User Inputs

output.var = params$output.var 

transform.abs = FALSE
log.pred = params$log.pred
norm.pred = FALSE
eda = params$eda
algo.forward.caret = params$algo.forward.caret
algo.backward.caret = params$algo.backward.caret
algo.stepwise.caret = params$algo.stepwise.caret
algo.LASSO.caret = params$algo.LASSO.caret
algo.LARS.caret = params$algo.LARS.caret
message("Parameters used for training/prediction: ")
## Parameters used for training/prediction:
str(params)
## List of 8
##  $ output.var         : chr "y3"
##  $ log.pred           : logi TRUE
##  $ eda                : logi FALSE
##  $ algo.forward.caret : logi TRUE
##  $ algo.backward.caret: logi TRUE
##  $ algo.stepwise.caret: logi TRUE
##  $ algo.LASSO.caret   : logi TRUE
##  $ algo.LARS.caret    : logi TRUE
# Setup Labels
output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.log') else  output.var.tr = output.var

Loading Data

feat  = read.csv('../../Data/features_highprec.csv')
labels = read.csv('../../Data/labels.csv')
predictors = names(dplyr::select(feat,-JobName))
data.ori = inner_join(feat,labels,by='JobName')
#data.ori = inner_join(feat,select_at(labels,c('JobName',output.var)),by='JobName')

Data validation

cc  = complete.cases(data.ori)
data.notComplete = data.ori[! cc,]
data = data.ori[cc,] %>% select_at(c(predictors,output.var,'JobName'))
message('Original cases: ',nrow(data.ori))
## Original cases: 10000
message('Non-Complete cases: ',nrow(data.notComplete))
## Non-Complete cases: 3020
message('Complete cases: ',nrow(data))
## Complete cases: 6980
summary(dplyr::select_at(data,c('JobName',output.var)))
##       JobName           y3        
##  Job_00001:   1   Min.   : 95.91  
##  Job_00002:   1   1st Qu.:118.29  
##  Job_00003:   1   Median :124.03  
##  Job_00004:   1   Mean   :125.40  
##  Job_00007:   1   3rd Qu.:131.06  
##  Job_00008:   1   Max.   :193.73  
##  (Other)  :6974

Output Variable

The Output Variable y3 shows right skewness, so will proceed with a log transformation

Histogram

df=gather(select_at(data,output.var))
ggplot(df, aes(x=value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() 

  #stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  

QQPlot

ggplot(gather(select_at(data,output.var)), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Trasformation of Output Variable from y3 to y3.log

if(log.pred==TRUE) data[[output.var.tr]] = log(data[[output.var]],10) else
  data[[output.var.tr]] = data[[output.var]]
df=gather(select_at(data,c(output.var,output.var.tr)))
ggplot(df, aes(value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() + 
  # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
  facet_wrap(~key, scales = 'free',ncol=2)

ggplot(gather(select_at(data,c(output.var,output.var.tr))), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Best Normalizator y3

Normalization of y3 using bestNormalize package. (suggested orderNorm) This is cool, but I think is too far for the objective of the project

t=bestNormalize::bestNormalize(data[[output.var]])
t
## Best Normalizing transformation with 6980 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - No transform: 2.9627 
##  - Box-Cox: 1.426 
##  - Log_b(x+a): 1.9884 
##  - sqrt(x+a): 2.4513 
##  - exp(x): 749.4167 
##  - arcsinh(x): 1.9884 
##  - Yeo-Johnson: 1.1169 
##  - orderNorm: 1.1737 
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 6980 nonmissing obs.:
##  Estimated statistics:
##  - lambda = -1.998639 
##  - mean (before standardization) = 0.5003083 
##  - sd (before standardization) = 5.108542e-06
qqnorm(data[[output.var]])

qqnorm(predict(t))

orderNorm() is a rank-based procedure by which the values of a vector are mapped to their percentile, which is then mapped to the same percentile of the normal distribution. Without the presence of ties, this essentially guarantees that the transformation leads to a uniform distribution

Predictors

All predictors show a Fat-Tail situation, where the two tails are very tall, and a low distribution around the mean. The orderNorm transformation can help (see [Best Normalizator] section)

Interesting Predictors

Histograms

if (eda == TRUE){
  cols = c('x11','x18','stat98','x7','stat110')
  df=gather(select_at(data,cols))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=3)
  
  # ggplot(gather(select_at(data,cols)), aes(sample=value)) + 
  #   stat_qq()+
  #   facet_wrap(~key, scales = 'free',ncol=2)
  
  lapply(select_at(data,cols),summary)
}

Scatter plot vs. output variable **y3.log

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light green',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=3)
}

All Predictors

Histograms

All indicators have a strong indication of Fat-Tails

if (eda == TRUE){
  df=gather(select_at(data,predictors))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=4)
}

Correlations

With Output Variable

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  t=as.data.frame(round(cor(dplyr::select(data,-one_of(output.var.tr,'JobName'))
                            ,select_at(data,output.var.tr)),4))  %>%
    rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-y3.log)
  #DT::datatable(t)
  message("Top Positive")
  kable(head(arrange(t,desc(y3.log)),20))
  message("Top Negative")
  kable(head(arrange(t,y3.log),20))
}

Between All Variables

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  t=as.data.frame(round(cor(dplyr::select(data,-one_of('JobName'))),4))
  #DT::datatable(t,options=list(scrollX=T))
  message("Showing only 10 variables")
  kable(t[1:10,1:10])
}

Scatter Plots with Output Variable

Scatter plots with all predictors and the output variable (y3.log)

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(predictors,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light blue',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=4)
}

Multicollinearity - VIF

No Multicollinearity among predictors

Showing Top predictor by VIF Value

if (eda == TRUE){
  vifDF = usdm::vif(select_at(data,predictors)) %>% arrange(desc(VIF))
  head(vifDF,15)
}

Feature Eng

  • Square Root transformation for x18
data.tr=data %>%
  mutate(x18.sqrt = sqrt(x18)) 
cols=c('x18','x18.sqrt')

Comparing Pre and Post Transformation Density Plots

# ggplot(gather(select_at(data.tr,cols)), aes(value)) + 
#   geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
#   geom_density() + 
#   facet_wrap(~key, scales = 'free',ncol=4)

d = gather(dplyr::select_at(data.tr,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
  geom_point(color='light blue',alpha=0.5) + 
  geom_smooth() + 
  facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#removing unwanted variables
data.tr=data.tr %>%
  dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('x18','y3','JobName')])

data=data.tr
label.names=output.var.tr

Modeling

PCA

# 0 for no interaction, 
# 1 for Full 2 way interaction and 
# 2 for Selective 2 way interaction
# 3 for Selective 3 way interaction
InteractionMode = 2

pca.vars  = names(data)
pca.vars = pca.vars[!pca.vars %in% label.names]

if(InteractionMode == 1){
  pca.formula =as.formula(paste0('~(',paste0(pca.vars, collapse ='+'),')^2'))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
  #saveRDS(pca.model,'pca.model.rds')
}
if (InteractionMode == 0){
  pca.model =  prcomp(x=data[,pca.vars],center=T,scale.=T,retx = T)
}
if (InteractionMode >= 2 & InteractionMode <= 3){
  controlled.vars = pca.vars[grep("^x",pca.vars)]
  stat.vars = pca.vars[grep("^stat",pca.vars)]
  
  if (InteractionMode >= 2){
    interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^2')
  }
  if (InteractionMode >= 3){
    interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^3')
  }
  no.interact.form = paste0(stat.vars, collapse ='+')
  
  pca.formula = as.formula(paste(interaction.form, no.interact.form, sep = "+"))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
}
targetCumVar = .9

pca.model$var = pca.model$sdev ^ 2 #eigenvalues
pca.model$pvar = pca.model$var / sum(pca.model$var)
pca.model$cumpvar = cumsum(pca.model$pvar )
pca.model$pcaSel = pca.model$cumpvar<=targetCumVar
pca.model$pcaSelCount = sum(pca.model$pcaSel)
pca.model$pcaSelTotVar = sum(pca.model$pvar[pca.model$pcaSel])
message(pca.model$pcaSelCount, " PCAs justify ",percent(targetCumVar)," of the total Variance. (",percent(pca.model$pcaSelTotVar),")")
## 193 PCAs justify 90.0% of the total Variance. (89.9%)
plot(pca.model$var,xlab="Principal component", ylab="Proportion of variance explained",   type='b')

plot(cumsum(pca.model$pvar ),xlab="Principal component", ylab="Cumulative Proportion of variance explained", ylim=c(0,1), type='b')

screeplot(pca.model,npcs = pca.model$pcaSelCount)

screeplot(pca.model,npcs = pca.model$pcaSelCount,type='lines')

#summary(pca.model)
#pca.model$rotation
#creating dataset
data.pca = dplyr::select(data,!!label.names) %>% 
  dplyr::bind_cols(dplyr::select(as.data.frame(pca.model$x)
                                 ,!!colnames(pca.model$rotation)[pca.model$pcaSel])
  )

Train Test Split

data.pca = data.pca[sample(nrow(data.pca)),] # randomly shuffle data
split = sample.split(data.pca[,label.names], SplitRatio = 0.8)

data.train = subset(data.pca, split == TRUE)
data.test = subset(data.pca, split == FALSE)

Common Functions

plot.diagnostics <-  function(model, train) {
  plot(model)
  
  residuals = resid(model) # Plotted above in plot(lm.out)
  r.standard = rstandard(model)
  r.student = rstudent(model)
  
  df = data.frame(x=predict(model,train),y=r.student)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = 0,size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  
  df = data.frame(x=predict(model,train),y=r.standard)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = c(-2,0,2),size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  # Histogram
  df=data.frame(r.student)
  p=ggplot(data=df,aes(r.student)) +
    geom_histogram(aes(y=..density..),bins = 50,fill='blue',alpha=0.6) + 
    stat_function(fun = dnorm, n = 100, args = list(mean = 0, sd = 1)) +
    ylab("Density")+
    xlab("Studentized Residuals")+
    ggtitle("Distribution of Studentized Residuals")
  plot(p)
  # http://www.stat.columbia.edu/~martin/W2024/R7.pdf
  # Influential plots
  inf.meas = influence.measures(model)
  # print (summary(inf.meas)) # too much data
  
  # Leverage plot
  lev = hat(model.matrix(model))
  df=tibble::rownames_to_column(as.data.frame(lev),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=lev)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    ylab('Leverage - check') + 
    xlab('Index')
  plot(p)
  # Cook's Distance
  cd = cooks.distance(model)
  df=tibble::rownames_to_column(as.data.frame(cd),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=cd)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_text(data=filter(df,cd>15/nrow(train)),aes(label=id),check_overlap=T,size=3,vjust=-.5)+
    ylab('Cooks distances') + 
    geom_hline(yintercept = c(4/nrow(train),0),size=1)+
    xlab('Index')
  plot(p)
  print (paste("Number of data points that have Cook's D > 4/n: ", length(cd[cd > 4/nrow(train)]), sep = "")) 
  print (paste("Number of data points that have Cook's D > 1: ", length(cd[cd > 1]), sep = "")) 
  return(cd)
}

# function to set up random seeds
# Based on http://jaehyeon-kim.github.io/2015/05/Setup-Random-Seeds-on-Caret-Package.html 
setCaretSeeds <- function(method = "cv", numbers = 1, repeats = 1, tunes = NULL, seed = 1701) {
  #B is the number of resamples and integer vector of M (numbers + tune length if any)
  B <- if (method == "cv") numbers
  else if(method == "repeatedcv") numbers * repeats
  else NULL
  if(is.null(length)) {
    seeds <- NULL
  } else {
    set.seed(seed = seed)
    seeds <- vector(mode = "list", length = B)
    seeds <- lapply(seeds, function(x) sample.int(n = 1000000
                                                  , size = numbers + ifelse(is.null(tunes), 0, tunes)))
    seeds[[length(seeds) + 1]] <- sample.int(n = 1000000, size = 1)
  }
  # return seeds
  seeds
}



train.caret.glmselect = function(formula, data, method
                                 ,subopt = NULL, feature.names
                                 , train.control = NULL, tune.grid = NULL, pre.proc = NULL){
  
  if(is.null(train.control)){
    train.control <- trainControl(method = "cv"
                              ,number = 10
                              ,seeds = setCaretSeeds(method = "cv"
                                                     , numbers = 10
                                                     , seed = 1701)
                              ,search = "grid"
                              ,verboseIter = TRUE
                              ,allowParallel = TRUE
                              )
  }
  
  if(is.null(tune.grid)){
    if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
      tune.grid = data.frame(nvmax = 1:length(feature.names))
    }
    if (method == 'glmnet' && subopt == 'LASSO'){
      # Will only show 1 Lambda value during training, but that is OK
      # https://stackoverflow.com/questions/47526544/why-need-to-tune-lambda-with-carettrain-method-glmnet-and-cv-glmnet
      # Another option for LASSO is this: https://github.com/topepo/caret/blob/master/RegressionTests/Code/lasso.R
      lambda = 10^seq(-2,0, length =100)
      alpha = c(1)
      tune.grid = expand.grid(alpha = alpha,lambda = lambda)
    }
    if (method == 'lars'){
      # https://github.com/topepo/caret/blob/master/RegressionTests/Code/lars.R
      fraction = seq(0, 1, length = 100)
      tune.grid = expand.grid(fraction = fraction)
      pre.proc = c("center", "scale") 
    }
  }
  
  # http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
  # #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
  cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
  registerDoParallel(cl)

  set.seed(1) 
  # note that the seed has to actually be set just before this function is called
  # settign is above just not ensure reproducibility for some reason
  model.caret <- caret::train(formula
                              , data = data
                              , method = method
                              , tuneGrid = tune.grid
                              , trControl = train.control
                              , preProc = pre.proc
                              )
  
  stopCluster(cl)
  registerDoSEQ() # register sequential engine in case you are not using this function anymore
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    print("All models results")
    print(model.caret$results) # all model results
    print("Best Model")
    print(model.caret$bestTune) # best model
    model = model.caret$finalModel

    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-nvmax) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=nvmax,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    # leap function does not support studentized residuals
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)
   
    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    id = rownames(model.caret$bestTune)    
    # Provides the coefficients of the best model
    # regsubsets doens return a full model (see documentation of regsubset), so we need to recalcualte themodel
    # https://stackoverflow.com/questions/13063762/how-to-obtain-a-lm-object-from-regsubsets
    print("Coefficients of final model:")
    coefs <- coef(model, id=id)
    #calculate the model to the the coef intervals
    nams <- names(coefs)
    nams <- nams[!nams %in% "(Intercept)"]
    response <-  as.character(formula[[2]])
    form <- as.formula(paste(response, paste(nams, collapse = " + "), sep = " ~ "))
    mod <- lm(form, data = data)
    #coefs
    #coef(mod)
    print(car::Confint(mod))
    return(list(model = model,id = id, residPlot = residPlot, residHistogram=residHistogram
                ,modelLM=mod))
  }
  if (method == 'glmnet' && subopt == 'LASSO'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    print(model.caret$results)
    model=model.caret$finalModel
    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-lambda) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=lambda,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot 
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') +
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    #no interval for glmnet: https://stackoverflow.com/questions/39750965/confidence-intervals-for-ridge-regression
    t=coef(model,s=model.caret$bestTune$lambda)
    model.coef = t[which(t[,1]!=0),]
    print(as.data.frame(model.coef))
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, metricsPlot=metricsPlot ))
  }
  if (method == 'lars'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    # Metrics Plot
    dataPlot = model.caret$results %>%
        gather(key='metric',value='value',-fraction) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=fraction,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    t=coef(model.caret$finalModel,s=model.caret$bestTune$fraction,mode='fraction')
    model.coef = t[which(t!=0)]
    print(model.coef)
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, residHistogram=residHistogram))
  }
}

# https://stackoverflow.com/questions/48265743/linear-model-subset-selection-goodness-of-fit-with-k-fold-cross-validation
# changed slightly since call[[2]] was just returning "formula" without actually returnign the value in formula
predict.regsubsets <- function(object, newdata, id, formula, ...) {
    #form <- as.formula(object$call[[2]])
    mat <- model.matrix(formula, newdata) # adds intercept and expands any interaction terms
    coefi <- coef(object, id = id)
    xvars <- names(coefi)
    return(mat[,xvars]%*%coefi)
}
  
test.model = function(model, test, level=0.95
                      ,draw.limits = FALSE, good = 0.1, ok = 0.15
                      ,method = NULL, subopt = NULL
                      ,id = NULL, formula, feature.names, label.names
                      ,transformation = NULL){
  ## if using caret for glm select equivalent functionality, 
  ## need to pass formula (full is ok as it will select subset of variables from there)
  if (is.null(method)){
    pred = predict(model, newdata=test, interval="confidence", level = level) 
  }
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    pred = predict.regsubsets(model, newdata = test, id = id, formula = formula)
  }
  
  if (method == 'glmnet' && subopt == 'LASSO'){
    xtest = as.matrix(test[,feature.names]) 
    pred=as.data.frame(predict(model, xtest))
  }
  
  if (method == 'lars'){
    pred=as.data.frame(predict(model, newdata = test))
  }
    
  # Summary of predicted values
  print ("Summary of predicted values: ")
  print(summary(pred[,1]))

  test.mse = mean((test[,label.names]-pred[,1])^2)
  print (paste(method, subopt, "Test MSE:", test.mse, sep=" "))
  
  if(log.pred == TRUE || norm.pred == TRUE){
    # plot transformewd comparison first
    df=data.frame(x=test[,label.names],y=pred[,1])
    ggplot(df,aes(x=x,y=y)) +
      geom_point(color='blue',alpha=0.5,shape=20,size=2) +
      geom_abline(slope=1,intercept=0,color='black',size=1) +
      #scale_y_continuous(limits=c(min(df),max(df)))+
      xlab("Actual (Transformed)")+
      ylab("Predicted (Transformed)")
  }
    
  if (log.pred == FALSE && norm.pred == FALSE){
    x = test[,label.names]
    y = pred[,1]
  }
  if (log.pred == TRUE){
    x = 10^test[,label.names]
    y = 10^pred[,1]  
  }
  if (norm.pred == TRUE){
    x = predict(transformation, test[,label.names], inverse = TRUE)
    y = predict(transformation, pred[,1], inverse = TRUE)
  }

  df=data.frame(x,y)
  ggplot(df,aes(x,y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_abline(slope=c(1+good,1-good,1+ok,1-ok)
                ,intercept=rep(0,4),color=c('dark green','dark green','dark red','dark red'),size=1,alpha=0.8) +
    #scale_y_continuous(limits=c(min(df),max(df)))+
    xlab("Actual")+
    ylab("Predicted") 
    
 
}

Setup Formulae

n <- names(data.train)
 formula <- as.formula(paste(paste(n[n %in% label.names], collapse = " + ")
                             ," ~", paste(n[!n %in% label.names], collapse = " + "))) 

grand.mean.formula = as.formula(paste(paste(n[n %in% label.names], collapse = " + ")," ~ 1"))

print(formula)
## y3.log ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + 
##     PC10 + PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + PC18 + 
##     PC19 + PC20 + PC21 + PC22 + PC23 + PC24 + PC25 + PC26 + PC27 + 
##     PC28 + PC29 + PC30 + PC31 + PC32 + PC33 + PC34 + PC35 + PC36 + 
##     PC37 + PC38 + PC39 + PC40 + PC41 + PC42 + PC43 + PC44 + PC45 + 
##     PC46 + PC47 + PC48 + PC49 + PC50 + PC51 + PC52 + PC53 + PC54 + 
##     PC55 + PC56 + PC57 + PC58 + PC59 + PC60 + PC61 + PC62 + PC63 + 
##     PC64 + PC65 + PC66 + PC67 + PC68 + PC69 + PC70 + PC71 + PC72 + 
##     PC73 + PC74 + PC75 + PC76 + PC77 + PC78 + PC79 + PC80 + PC81 + 
##     PC82 + PC83 + PC84 + PC85 + PC86 + PC87 + PC88 + PC89 + PC90 + 
##     PC91 + PC92 + PC93 + PC94 + PC95 + PC96 + PC97 + PC98 + PC99 + 
##     PC100 + PC101 + PC102 + PC103 + PC104 + PC105 + PC106 + PC107 + 
##     PC108 + PC109 + PC110 + PC111 + PC112 + PC113 + PC114 + PC115 + 
##     PC116 + PC117 + PC118 + PC119 + PC120 + PC121 + PC122 + PC123 + 
##     PC124 + PC125 + PC126 + PC127 + PC128 + PC129 + PC130 + PC131 + 
##     PC132 + PC133 + PC134 + PC135 + PC136 + PC137 + PC138 + PC139 + 
##     PC140 + PC141 + PC142 + PC143 + PC144 + PC145 + PC146 + PC147 + 
##     PC148 + PC149 + PC150 + PC151 + PC152 + PC153 + PC154 + PC155 + 
##     PC156 + PC157 + PC158 + PC159 + PC160 + PC161 + PC162 + PC163 + 
##     PC164 + PC165 + PC166 + PC167 + PC168 + PC169 + PC170 + PC171 + 
##     PC172 + PC173 + PC174 + PC175 + PC176 + PC177 + PC178 + PC179 + 
##     PC180 + PC181 + PC182 + PC183 + PC184 + PC185 + PC186 + PC187 + 
##     PC188 + PC189 + PC190 + PC191 + PC192 + PC193
print(grand.mean.formula)
## y3.log ~ 1
# Update feature.names because we may have transformed some features
feature.names = n[!n %in% label.names]

Full Model

model.full = lm(formula , data.train)
summary(model.full)
## 
## Call:
## lm(formula = formula, data = data.train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.087715 -0.021207 -0.004992  0.016357  0.194543 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  2.097e+00  4.267e-04 4913.714  < 2e-16 ***
## PC1         -9.046e-04  9.152e-05   -9.885  < 2e-16 ***
## PC2          1.214e-03  1.010e-04   12.026  < 2e-16 ***
## PC3         -2.803e-04  1.108e-04   -2.531 0.011400 *  
## PC4          1.514e-04  1.128e-04    1.343 0.179407    
## PC5          6.094e-04  1.128e-04    5.400 6.93e-08 ***
## PC6          1.131e-04  1.147e-04    0.986 0.324354    
## PC7         -4.970e-04  1.159e-04   -4.289 1.83e-05 ***
## PC8         -3.138e-04  1.176e-04   -2.669 0.007634 ** 
## PC9         -2.175e-04  1.209e-04   -1.798 0.072161 .  
## PC10         2.513e-04  1.228e-04    2.047 0.040676 *  
## PC11        -1.429e-03  1.256e-04  -11.378  < 2e-16 ***
## PC12         3.743e-04  1.263e-04    2.964 0.003050 ** 
## PC13         4.544e-04  1.291e-04    3.520 0.000434 ***
## PC14         1.544e-03  1.306e-04   11.824  < 2e-16 ***
## PC15        -4.195e-04  1.321e-04   -3.175 0.001506 ** 
## PC16         7.432e-04  1.363e-04    5.453 5.16e-08 ***
## PC17        -2.039e-04  1.398e-04   -1.458 0.144782    
## PC18         2.628e-04  1.429e-04    1.839 0.066011 .  
## PC19        -2.048e-04  1.477e-04   -1.386 0.165708    
## PC20         9.424e-04  1.537e-04    6.133 9.26e-10 ***
## PC21        -1.014e-03  1.589e-04   -6.383 1.88e-10 ***
## PC22         4.106e-03  1.658e-04   24.769  < 2e-16 ***
## PC23        -5.377e-04  3.308e-04   -1.626 0.104059    
## PC24        -1.182e-03  3.654e-04   -3.235 0.001224 ** 
## PC25         6.342e-04  3.669e-04    1.729 0.083939 .  
## PC26        -3.130e-04  3.644e-04   -0.859 0.390452    
## PC27        -4.701e-04  3.664e-04   -1.283 0.199533    
## PC28         6.822e-04  3.685e-04    1.851 0.064161 .  
## PC29        -6.829e-04  3.682e-04   -1.855 0.063664 .  
## PC30         3.572e-04  3.729e-04    0.958 0.338130    
## PC31         8.227e-05  3.764e-04    0.219 0.826999    
## PC32         1.198e-03  3.743e-04    3.200 0.001382 ** 
## PC33         3.023e-04  3.679e-04    0.822 0.411266    
## PC34         2.586e-04  3.756e-04    0.689 0.491081    
## PC35         7.535e-05  3.718e-04    0.203 0.839391    
## PC36         5.147e-04  3.734e-04    1.378 0.168136    
## PC37        -3.376e-04  3.758e-04   -0.898 0.369120    
## PC38         4.284e-04  3.774e-04    1.135 0.256347    
## PC39         3.449e-04  3.782e-04    0.912 0.361781    
## PC40         3.887e-04  3.777e-04    1.029 0.303494    
## PC41        -6.627e-04  3.780e-04   -1.753 0.079627 .  
## PC42        -4.854e-04  3.796e-04   -1.279 0.201082    
## PC43         9.553e-04  3.763e-04    2.539 0.011152 *  
## PC44        -3.401e-04  3.777e-04   -0.900 0.367990    
## PC45         1.398e-03  3.798e-04    3.682 0.000234 ***
## PC46         1.693e-03  3.843e-04    4.406 1.07e-05 ***
## PC47         3.140e-04  3.854e-04    0.815 0.415260    
## PC48        -3.535e-04  3.827e-04   -0.924 0.355739    
## PC49         3.876e-04  3.832e-04    1.011 0.311841    
## PC50        -6.684e-04  3.831e-04   -1.744 0.081132 .  
## PC51         4.509e-04  3.867e-04    1.166 0.243714    
## PC52         1.715e-04  3.854e-04    0.445 0.656406    
## PC53        -3.577e-04  3.877e-04   -0.923 0.356205    
## PC54         9.373e-05  3.894e-04    0.241 0.809799    
## PC55         2.034e-04  3.910e-04    0.520 0.602896    
## PC56         1.708e-04  3.875e-04    0.441 0.659470    
## PC57         4.296e-04  3.883e-04    1.106 0.268717    
## PC58        -4.297e-04  3.919e-04   -1.097 0.272899    
## PC59         1.491e-03  3.898e-04    3.824 0.000133 ***
## PC60        -1.019e-03  3.901e-04   -2.613 0.008994 ** 
## PC61        -5.935e-04  3.926e-04   -1.512 0.130668    
## PC62        -2.053e-04  3.922e-04   -0.523 0.600672    
## PC63         4.817e-04  3.916e-04    1.230 0.218790    
## PC64        -1.757e-04  3.928e-04   -0.447 0.654661    
## PC65         9.852e-04  3.926e-04    2.510 0.012115 *  
## PC66         4.366e-04  3.975e-04    1.098 0.272073    
## PC67         6.367e-04  3.933e-04    1.619 0.105549    
## PC68         7.810e-04  3.948e-04    1.978 0.047946 *  
## PC69        -1.749e-03  3.972e-04   -4.405 1.08e-05 ***
## PC70         2.389e-04  3.966e-04    0.602 0.546924    
## PC71        -6.239e-04  3.962e-04   -1.575 0.115418    
## PC72         5.217e-04  3.995e-04    1.306 0.191735    
## PC73         4.986e-04  3.979e-04    1.253 0.210199    
## PC74        -1.640e-04  4.004e-04   -0.410 0.682084    
## PC75         3.123e-04  4.007e-04    0.779 0.435755    
## PC76         1.269e-03  3.998e-04    3.174 0.001514 ** 
## PC77        -6.029e-05  3.996e-04   -0.151 0.880078    
## PC78        -3.975e-04  4.018e-04   -0.989 0.322629    
## PC79         7.739e-04  4.019e-04    1.926 0.054181 .  
## PC80        -6.520e-04  4.008e-04   -1.627 0.103854    
## PC81        -1.400e-04  4.033e-04   -0.347 0.728608    
## PC82        -9.804e-04  3.995e-04   -2.454 0.014160 *  
## PC83         1.094e-03  4.031e-04    2.715 0.006654 ** 
## PC84        -3.660e-05  4.050e-04   -0.090 0.927997    
## PC85         1.667e-04  4.050e-04    0.412 0.680592    
## PC86         3.579e-04  4.056e-04    0.882 0.377668    
## PC87         4.440e-04  4.062e-04    1.093 0.274426    
## PC88         2.336e-04  4.033e-04    0.579 0.562490    
## PC89         3.819e-04  4.072e-04    0.938 0.348344    
## PC90        -2.006e-03  4.094e-04   -4.899 9.94e-07 ***
## PC91         6.582e-04  4.068e-04    1.618 0.105744    
## PC92        -5.238e-04  4.079e-04   -1.284 0.199218    
## PC93         2.048e-04  4.085e-04    0.501 0.616142    
## PC94         5.523e-04  4.079e-04    1.354 0.175805    
## PC95        -2.440e-04  4.097e-04   -0.596 0.551526    
## PC96        -6.906e-04  4.115e-04   -1.678 0.093368 .  
## PC97        -1.299e-04  4.110e-04   -0.316 0.751929    
## PC98         5.237e-04  4.112e-04    1.274 0.202780    
## PC99         1.255e-04  4.087e-04    0.307 0.758705    
## PC100        7.239e-05  4.100e-04    0.177 0.859873    
## PC101       -1.334e-03  4.095e-04   -3.257 0.001133 ** 
## PC102        1.545e-03  4.125e-04    3.746 0.000181 ***
## PC103        3.948e-05  4.134e-04    0.095 0.923931    
## PC104        7.215e-04  4.136e-04    1.745 0.081093 .  
## PC105        1.671e-04  4.130e-04    0.405 0.685788    
## PC106        1.325e-03  4.144e-04    3.198 0.001390 ** 
## PC107        1.844e-04  4.151e-04    0.444 0.656941    
## PC108       -1.013e-03  4.165e-04   -2.433 0.015023 *  
## PC109       -4.985e-05  4.191e-04   -0.119 0.905311    
## PC110        7.128e-05  4.171e-04    0.171 0.864314    
## PC111       -6.842e-04  4.186e-04   -1.634 0.102253    
## PC112        7.254e-04  4.209e-04    1.724 0.084850 .  
## PC113        5.604e-05  4.183e-04    0.134 0.893422    
## PC114       -4.828e-04  4.218e-04   -1.145 0.252381    
## PC115       -7.532e-04  4.218e-04   -1.786 0.074200 .  
## PC116        3.324e-05  4.200e-04    0.079 0.936926    
## PC117       -1.730e-03  4.186e-04   -4.132 3.66e-05 ***
## PC118       -1.056e-03  4.227e-04   -2.498 0.012526 *  
## PC119       -1.002e-03  4.256e-04   -2.355 0.018555 *  
## PC120       -6.511e-04  4.226e-04   -1.541 0.123452    
## PC121        7.328e-04  4.253e-04    1.723 0.084970 .  
## PC122       -1.922e-05  4.259e-04   -0.045 0.964001    
## PC123       -7.914e-04  4.281e-04   -1.849 0.064562 .  
## PC124       -2.738e-04  4.247e-04   -0.645 0.519151    
## PC125        3.212e-04  4.264e-04    0.753 0.451408    
## PC126        9.483e-04  4.256e-04    2.228 0.025920 *  
## PC127        1.396e-03  4.264e-04    3.273 0.001069 ** 
## PC128       -8.822e-04  4.287e-04   -2.058 0.039646 *  
## PC129       -8.384e-04  4.315e-04   -1.943 0.052042 .  
## PC130        6.387e-06  4.312e-04    0.015 0.988183    
## PC131        1.028e-03  4.256e-04    2.416 0.015729 *  
## PC132        1.533e-03  4.297e-04    3.567 0.000365 ***
## PC133       -2.147e-04  4.267e-04   -0.503 0.614850    
## PC134       -1.882e-04  4.290e-04   -0.439 0.660832    
## PC135       -5.057e-04  4.327e-04   -1.169 0.242544    
## PC136        1.449e-04  4.334e-04    0.334 0.738040    
## PC137        3.843e-04  4.342e-04    0.885 0.376114    
## PC138        1.134e-03  4.294e-04    2.640 0.008308 ** 
## PC139       -1.315e-03  4.325e-04   -3.041 0.002371 ** 
## PC140        1.072e-04  4.349e-04    0.246 0.805328    
## PC141        6.595e-05  4.341e-04    0.152 0.879271    
## PC142        5.539e-04  4.361e-04    1.270 0.204078    
## PC143       -1.026e-03  4.374e-04   -2.345 0.019060 *  
## PC144       -5.879e-05  4.374e-04   -0.134 0.893087    
## PC145        5.103e-04  4.378e-04    1.166 0.243784    
## PC146       -3.682e-04  4.360e-04   -0.845 0.398374    
## PC147        9.961e-04  4.378e-04    2.275 0.022926 *  
## PC148        7.951e-04  4.398e-04    1.808 0.070690 .  
## PC149       -3.656e-04  4.377e-04   -0.835 0.403647    
## PC150        3.346e-04  4.421e-04    0.757 0.449211    
## PC151        6.576e-04  4.420e-04    1.488 0.136903    
## PC152       -5.093e-05  4.409e-04   -0.116 0.908035    
## PC153       -2.354e-04  4.441e-04   -0.530 0.596168    
## PC154        5.636e-04  4.411e-04    1.278 0.201427    
## PC155        6.757e-04  4.435e-04    1.523 0.127721    
## PC156       -1.351e-03  4.476e-04   -3.019 0.002544 ** 
## PC157        1.113e-03  4.459e-04    2.496 0.012587 *  
## PC158       -2.980e-04  4.456e-04   -0.669 0.503698    
## PC159       -3.976e-04  4.432e-04   -0.897 0.369729    
## PC160        1.431e-03  4.431e-04    3.230 0.001244 ** 
## PC161       -1.794e-04  4.469e-04   -0.401 0.688199    
## PC162        4.193e-04  4.457e-04    0.941 0.346912    
## PC163        5.034e-04  4.461e-04    1.128 0.259222    
## PC164       -3.660e-04  4.493e-04   -0.815 0.415365    
## PC165       -1.269e-04  4.488e-04   -0.283 0.777429    
## PC166       -4.931e-04  4.491e-04   -1.098 0.272215    
## PC167        4.717e-04  4.481e-04    1.053 0.292534    
## PC168       -1.737e-04  4.529e-04   -0.383 0.701399    
## PC169       -8.749e-05  4.501e-04   -0.194 0.845892    
## PC170       -7.141e-04  4.498e-04   -1.587 0.112458    
## PC171        6.442e-04  4.482e-04    1.437 0.150706    
## PC172        5.174e-04  4.526e-04    1.143 0.252926    
## PC173       -1.108e-03  4.533e-04   -2.445 0.014506 *  
## PC174        9.658e-04  4.522e-04    2.136 0.032745 *  
## PC175        7.062e-04  4.552e-04    1.552 0.120827    
## PC176        4.938e-05  4.551e-04    0.109 0.913602    
## PC177       -2.200e-05  4.542e-04   -0.048 0.961373    
## PC178       -1.392e-04  4.575e-04   -0.304 0.760932    
## PC179       -1.537e-03  4.579e-04   -3.356 0.000795 ***
## PC180        1.095e-03  4.604e-04    2.378 0.017450 *  
## PC181       -8.740e-04  4.578e-04   -1.909 0.056275 .  
## PC182       -2.303e-04  4.600e-04   -0.501 0.616572    
## PC183       -7.055e-04  4.588e-04   -1.538 0.124201    
## PC184       -5.284e-04  4.610e-04   -1.146 0.251798    
## PC185       -7.863e-05  4.588e-04   -0.171 0.863924    
## PC186        9.955e-04  4.627e-04    2.152 0.031464 *  
## PC187        1.189e-03  4.607e-04    2.580 0.009909 ** 
## PC188       -7.399e-04  4.623e-04   -1.601 0.109541    
## PC189        9.628e-04  4.671e-04    2.061 0.039332 *  
## PC190       -3.722e-04  4.624e-04   -0.805 0.420845    
## PC191       -4.225e-04  4.623e-04   -0.914 0.360807    
## PC192       -1.850e-03  4.659e-04   -3.971 7.25e-05 ***
## PC193        2.356e-04  4.682e-04    0.503 0.614862    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03177 on 5390 degrees of freedom
## Multiple R-squared:  0.2579, Adjusted R-squared:  0.2314 
## F-statistic: 9.707 on 193 and 5390 DF,  p-value: < 2.2e-16
cd.full = plot.diagnostics(model=model.full, train=data.train)

## [1] "Number of data points that have Cook's D > 4/n: 275"
## [1] "Number of data points that have Cook's D > 1: 0"

Checking with removal of high influence points

high.cd = names(cd.full[cd.full > 4/nrow(data.train)])

#save dataset with high.cd flagged
t = data.train %>% 
  rownames_to_column() %>%
  mutate(high.cd = ifelse(rowname %in% high.cd,1,0))
#write.csv(t,file='data_high_cd_flag.csv',row.names = F)
###
data.train2 = data.train[!(rownames(data.train)) %in% high.cd,]
model.full2 = lm(formula , data.train2)
summary(model.full2)
## 
## Call:
## lm(formula = formula, data = data.train2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.076433 -0.020875 -0.002849  0.019196  0.085203 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  2.094e+00  3.952e-04 5298.868  < 2e-16 ***
## PC1          2.697e-04  2.660e-05   10.136  < 2e-16 ***
## PC2         -3.831e-04  3.141e-05  -12.199  < 2e-16 ***
## PC3         -1.364e-04  3.758e-05   -3.630 0.000286 ***
## PC4         -6.954e-05  3.854e-05   -1.804 0.071215 .  
## PC5         -2.346e-04  3.883e-05   -6.040 1.64e-09 ***
## PC6          8.715e-05  3.944e-05    2.210 0.027177 *  
## PC7         -1.779e-04  4.021e-05   -4.424 9.90e-06 ***
## PC8         -1.058e-04  4.136e-05   -2.558 0.010567 *  
## PC9          1.096e-04  4.285e-05    2.558 0.010558 *  
## PC10         2.573e-05  4.402e-05    0.585 0.558867    
## PC11         4.992e-04  4.558e-05   10.951  < 2e-16 ***
## PC12        -3.876e-04  4.597e-05   -8.430  < 2e-16 ***
## PC13         2.583e-04  4.748e-05    5.441 5.54e-08 ***
## PC14         6.212e-04  4.819e-05   12.892  < 2e-16 ***
## PC15        -1.272e-04  4.948e-05   -2.571 0.010175 *  
## PC16         1.897e-04  5.160e-05    3.676 0.000239 ***
## PC17        -6.545e-05  5.338e-05   -1.226 0.220173    
## PC18        -7.167e-05  5.569e-05   -1.287 0.198158    
## PC19        -1.125e-04  5.800e-05   -1.939 0.052566 .  
## PC20         4.207e-04  6.204e-05    6.781 1.32e-11 ***
## PC21        -2.845e-04  6.453e-05   -4.409 1.06e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02879 on 5292 degrees of freedom
## Multiple R-squared:  0.1335, Adjusted R-squared:  0.1301 
## F-statistic: 38.83 on 21 and 5292 DF,  p-value: < 2.2e-16
cd.full2 = plot.diagnostics(model.full2, data.train2)

## [1] "Number of data points that have Cook's D > 4/n: 202"
## [1] "Number of data points that have Cook's D > 1: 0"
# much more normal residuals than before. 
# Checking to see if distributions are different and if so whcih variables
# High Leverage Plot 
plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,target=one_of(label.names))

ggplot(data=plotData, aes(x=type,y=target)) +
  geom_boxplot(fill='light blue',outlier.shape=NA) +
  scale_y_continuous(name="Target Variable Values",label=scales::comma_format(accuracy=.1)) +
  theme_light() +
  ggtitle('Distribution of High Leverage Points and Normal  Points')

# 2 sample t-tests

plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,one_of(feature.names))

comp.test = lapply(dplyr::select(plotData, one_of(feature.names))
                   , function(x) t.test(x ~ plotData$type, var.equal = TRUE)) 

sig.comp = list.filter(comp.test, p.value < 0.05)
sapply(sig.comp, function(x) x[['p.value']])
##          PC1         PC14         PC35         PC46         PC70         PC86        PC115        PC119        PC133 
## 1.565306e-06 2.699042e-04 3.139225e-02 2.067661e-02 1.226728e-02 4.405414e-03 4.351945e-02 7.991546e-03 4.418544e-02 
##        PC143        PC150        PC169 
## 2.132315e-02 2.579840e-02 3.502265e-02
mm = melt(plotData, id=c('type')) %>% filter(variable %in% names(sig.comp))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=5, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

# Distribution (box) Plots
mm = melt(plotData, id=c('type'))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=8, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

Grand Means Model

model.null = lm(grand.mean.formula, data.train)
summary(model.null)
## 
## Call:
## lm(formula = grand.mean.formula, data = data.train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.114676 -0.023705 -0.003387  0.020847  0.190636 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.096552   0.000485    4323   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03624 on 5583 degrees of freedom

Variable Selection

Basic: http://www.stat.columbia.edu/~martin/W2024/R10.pdf Cross Validation + Other Metrics: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/

Forward Selection with CV

Train

if (algo.forward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   , data = data.train
                                   , method = "leapForward"
                                   , feature.names = feature.names)
  model.forward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 165 on full training set
## [1] "All models results"
##     nvmax       RMSE   Rsquared        MAE      RMSESD RsquaredSD        MAESD
## 1       1 0.03466409 0.08593606 0.02690290 0.001099875 0.02279859 0.0006370046
## 2       2 0.03444082 0.09774700 0.02676647 0.001072722 0.02436339 0.0005267890
## 3       3 0.03407079 0.11785519 0.02643370 0.001163325 0.03529461 0.0006039663
## 4       4 0.03360175 0.14159740 0.02603125 0.001128829 0.03712006 0.0006157767
## 5       5 0.03334636 0.15519820 0.02585069 0.001161894 0.04142363 0.0006092038
## 6       6 0.03329792 0.15764518 0.02580989 0.001164457 0.04105992 0.0006217137
## 7       7 0.03321062 0.16192922 0.02580111 0.001118805 0.04024003 0.0006483500
## 8       8 0.03312435 0.16641525 0.02571374 0.001182956 0.04135877 0.0006559573
## 9       9 0.03307574 0.16922910 0.02568986 0.001244109 0.04504879 0.0007230730
## 10     10 0.03303978 0.17078808 0.02565820 0.001240144 0.04278899 0.0007128830
## 11     11 0.03300951 0.17214185 0.02560873 0.001182527 0.04066502 0.0006407614
## 12     12 0.03297259 0.17391369 0.02556379 0.001151144 0.04045896 0.0005755534
## 13     13 0.03290594 0.17734479 0.02553116 0.001187443 0.04207657 0.0006026256
## 14     14 0.03288937 0.17815481 0.02550390 0.001186010 0.04148991 0.0006088042
## 15     15 0.03289863 0.17779109 0.02549328 0.001181655 0.04154013 0.0005901351
## 16     16 0.03290108 0.17770618 0.02551469 0.001177906 0.04159493 0.0006027152
## 17     17 0.03291394 0.17710046 0.02551820 0.001175968 0.03925230 0.0005886936
## 18     18 0.03290527 0.17748093 0.02551383 0.001175942 0.03807660 0.0006100222
## 19     19 0.03290986 0.17743033 0.02552582 0.001190951 0.03969890 0.0006145930
## 20     20 0.03289854 0.17787885 0.02547556 0.001162280 0.03714049 0.0006057325
## 21     21 0.03291432 0.17732975 0.02547539 0.001168697 0.03664888 0.0006219098
## 22     22 0.03294952 0.17580765 0.02549812 0.001195873 0.03646541 0.0006118553
## 23     23 0.03294826 0.17610748 0.02548558 0.001213664 0.03650773 0.0006299920
## 24     24 0.03292027 0.17754091 0.02546476 0.001231337 0.03800989 0.0006565158
## 25     25 0.03294866 0.17630971 0.02550823 0.001208678 0.03719119 0.0006546398
## 26     26 0.03290973 0.17806409 0.02548409 0.001204610 0.03653682 0.0006528609
## 27     27 0.03287192 0.17989069 0.02544376 0.001175764 0.03541137 0.0006383029
## 28     28 0.03286223 0.18051640 0.02544539 0.001187682 0.03553025 0.0006518692
## 29     29 0.03284787 0.18133215 0.02542458 0.001189066 0.03625150 0.0006592356
## 30     30 0.03285112 0.18115590 0.02541177 0.001192119 0.03536294 0.0006746716
## 31     31 0.03284241 0.18175920 0.02539103 0.001198299 0.03624295 0.0006670106
## 32     32 0.03282690 0.18253951 0.02539303 0.001210703 0.03615870 0.0006677734
## 33     33 0.03283039 0.18232589 0.02539652 0.001173610 0.03452840 0.0006383590
## 34     34 0.03282857 0.18267519 0.02538616 0.001180247 0.03517787 0.0006573371
## 35     35 0.03279388 0.18435562 0.02535449 0.001203981 0.03677862 0.0006728414
## 36     36 0.03280300 0.18404794 0.02535279 0.001196797 0.03609555 0.0006771798
## 37     37 0.03277155 0.18548928 0.02533325 0.001177708 0.03594142 0.0006724391
## 38     38 0.03274453 0.18687692 0.02530497 0.001179260 0.03636114 0.0006871403
## 39     39 0.03270205 0.18898426 0.02525424 0.001228301 0.03715431 0.0007396072
## 40     40 0.03266982 0.19048854 0.02522992 0.001205831 0.03588633 0.0007149534
## 41     41 0.03264796 0.19155348 0.02522275 0.001198048 0.03566628 0.0007077642
## 42     42 0.03264541 0.19165742 0.02521095 0.001210882 0.03489479 0.0007301137
## 43     43 0.03266333 0.19080410 0.02522898 0.001208845 0.03386349 0.0007345204
## 44     44 0.03264611 0.19174176 0.02520616 0.001206329 0.03413585 0.0007358896
## 45     45 0.03262621 0.19268552 0.02518291 0.001239855 0.03427913 0.0007560578
## 46     46 0.03258768 0.19452160 0.02514674 0.001251096 0.03422791 0.0007698973
## 47     47 0.03259438 0.19436330 0.02514636 0.001260866 0.03397058 0.0007785031
## 48     48 0.03259454 0.19429819 0.02515352 0.001243649 0.03298438 0.0007657942
## 49     49 0.03258654 0.19466559 0.02514511 0.001227054 0.03227825 0.0007461070
## 50     50 0.03258122 0.19496342 0.02514312 0.001203495 0.03138270 0.0007384793
## 51     51 0.03258135 0.19498368 0.02514561 0.001208911 0.03093455 0.0007297054
## 52     52 0.03257773 0.19516069 0.02513577 0.001195345 0.03114276 0.0007188847
## 53     53 0.03255960 0.19602272 0.02510879 0.001191656 0.03074620 0.0007114939
## 54     54 0.03255429 0.19638389 0.02509214 0.001205229 0.03120553 0.0007399053
## 55     55 0.03256109 0.19607996 0.02508365 0.001195712 0.03091888 0.0007274028
## 56     56 0.03257292 0.19553428 0.02509034 0.001158129 0.03007109 0.0007056556
## 57     57 0.03257119 0.19576834 0.02507842 0.001182285 0.03064198 0.0007179040
## 58     58 0.03256927 0.19596230 0.02508886 0.001199746 0.03131415 0.0007206673
## 59     59 0.03256238 0.19638145 0.02507638 0.001205026 0.03212678 0.0007232190
## 60     60 0.03255904 0.19645760 0.02507619 0.001176347 0.03119313 0.0007005191
## 61     61 0.03257115 0.19598378 0.02509872 0.001186340 0.03098980 0.0007018083
## 62     62 0.03256850 0.19614629 0.02508209 0.001193289 0.03116293 0.0007123351
## 63     63 0.03256878 0.19614879 0.02506476 0.001178011 0.03128179 0.0006987796
## 64     64 0.03258160 0.19564883 0.02507713 0.001171820 0.03110950 0.0006973617
## 65     65 0.03257337 0.19603401 0.02507616 0.001178181 0.03109982 0.0006927481
## 66     66 0.03256289 0.19651759 0.02507240 0.001152406 0.03077011 0.0006759460
## 67     67 0.03255547 0.19674709 0.02506810 0.001111977 0.02912399 0.0006342699
## 68     68 0.03256352 0.19643102 0.02508041 0.001099121 0.02880712 0.0006403913
## 69     69 0.03258474 0.19558907 0.02509238 0.001095843 0.02957743 0.0006382675
## 70     70 0.03256258 0.19664707 0.02508370 0.001102534 0.03054145 0.0006383898
## 71     71 0.03256563 0.19651853 0.02508570 0.001095553 0.03011035 0.0006273289
## 72     72 0.03255558 0.19701661 0.02508266 0.001102790 0.03014175 0.0006420565
## 73     73 0.03256878 0.19647592 0.02509621 0.001115125 0.03017583 0.0006614937
## 74     74 0.03255894 0.19697037 0.02508608 0.001113529 0.03012313 0.0006578912
## 75     75 0.03255181 0.19727440 0.02508525 0.001095115 0.02976081 0.0006473178
## 76     76 0.03254928 0.19743647 0.02507842 0.001102058 0.02985863 0.0006489150
## 77     77 0.03254645 0.19759285 0.02507547 0.001097109 0.02967815 0.0006440672
## 78     78 0.03256138 0.19699971 0.02508238 0.001092876 0.02949180 0.0006398716
## 79     79 0.03257218 0.19652801 0.02509813 0.001102040 0.02960393 0.0006609104
## 80     80 0.03256535 0.19677803 0.02509489 0.001095838 0.02928694 0.0006479440
## 81     81 0.03257940 0.19624838 0.02510474 0.001095521 0.02946434 0.0006587500
## 82     82 0.03258212 0.19618596 0.02509906 0.001090147 0.02973374 0.0006569935
## 83     83 0.03257952 0.19629837 0.02508958 0.001083789 0.02932779 0.0006470222
## 84     84 0.03259877 0.19543449 0.02510710 0.001065333 0.02856316 0.0006313295
## 85     85 0.03260326 0.19527402 0.02511438 0.001074749 0.02871774 0.0006341643
## 86     86 0.03260756 0.19512491 0.02511320 0.001081647 0.02804570 0.0006526227
## 87     87 0.03260869 0.19502177 0.02511194 0.001074018 0.02766882 0.0006357424
## 88     88 0.03257934 0.19634158 0.02509194 0.001075583 0.02807163 0.0006330695
## 89     89 0.03256771 0.19690981 0.02507899 0.001066566 0.02805645 0.0006162629
## 90     90 0.03256139 0.19724389 0.02507530 0.001051754 0.02798320 0.0006052220
## 91     91 0.03257390 0.19673315 0.02507730 0.001056537 0.02785121 0.0006197841
## 92     92 0.03256643 0.19713883 0.02507519 0.001049962 0.02776768 0.0006179846
## 93     93 0.03257223 0.19698998 0.02507329 0.001034142 0.02782246 0.0006143069
## 94     94 0.03256869 0.19714013 0.02506864 0.001051161 0.02804355 0.0006289716
## 95     95 0.03256645 0.19723374 0.02505932 0.001055644 0.02819029 0.0006399634
## 96     96 0.03255582 0.19770835 0.02504932 0.001051033 0.02780592 0.0006399934
## 97     97 0.03257105 0.19707388 0.02505358 0.001040202 0.02783706 0.0006326067
## 98     98 0.03256043 0.19762494 0.02504500 0.001044906 0.02800458 0.0006329658
## 99     99 0.03256635 0.19741017 0.02503914 0.001045285 0.02802643 0.0006345764
## 100   100 0.03256301 0.19747432 0.02502839 0.001035265 0.02731854 0.0006214056
## 101   101 0.03256769 0.19728754 0.02502977 0.001040779 0.02732606 0.0006160106
## 102   102 0.03258204 0.19676237 0.02504765 0.001045791 0.02740761 0.0006217253
## 103   103 0.03259032 0.19638580 0.02505547 0.001063605 0.02752585 0.0006358492
## 104   104 0.03259013 0.19644598 0.02505591 0.001055495 0.02774818 0.0006312462
## 105   105 0.03259392 0.19629624 0.02506246 0.001050235 0.02761602 0.0006340249
## 106   106 0.03259346 0.19638043 0.02505964 0.001061245 0.02848324 0.0006499882
## 107   107 0.03259731 0.19620788 0.02506104 0.001055439 0.02868784 0.0006491535
## 108   108 0.03259443 0.19641660 0.02506130 0.001065440 0.02927143 0.0006545463
## 109   109 0.03259579 0.19638360 0.02505221 0.001063869 0.02919751 0.0006524737
## 110   110 0.03259726 0.19640394 0.02505880 0.001071337 0.02954749 0.0006555193
## 111   111 0.03260080 0.19632102 0.02505838 0.001076525 0.02971379 0.0006736573
## 112   112 0.03258709 0.19693179 0.02504452 0.001076582 0.02976677 0.0006718096
## 113   113 0.03257337 0.19758762 0.02502825 0.001079025 0.03000422 0.0006659025
## 114   114 0.03256680 0.19788526 0.02502679 0.001077016 0.02996731 0.0006619143
## 115   115 0.03256842 0.19782387 0.02503088 0.001070386 0.03010472 0.0006587471
## 116   116 0.03256572 0.19789861 0.02502902 0.001068234 0.02995742 0.0006546341
## 117   117 0.03256581 0.19789104 0.02503517 0.001062091 0.02973214 0.0006548098
## 118   118 0.03256962 0.19774913 0.02504064 0.001064001 0.02954402 0.0006557233
## 119   119 0.03256530 0.19795116 0.02503579 0.001058096 0.02936706 0.0006477297
## 120   120 0.03256854 0.19783873 0.02504256 0.001046918 0.02936240 0.0006402407
## 121   121 0.03257053 0.19774464 0.02504862 0.001043502 0.02916716 0.0006330462
## 122   122 0.03256147 0.19813419 0.02504103 0.001043611 0.02923671 0.0006348141
## 123   123 0.03256752 0.19790950 0.02504165 0.001031739 0.02921143 0.0006153933
## 124   124 0.03256985 0.19783048 0.02504350 0.001028169 0.02917549 0.0006141822
## 125   125 0.03257470 0.19762978 0.02505065 0.001035215 0.02944951 0.0006206520
## 126   126 0.03257185 0.19774902 0.02504810 0.001025157 0.02909225 0.0006099171
## 127   127 0.03256789 0.19793199 0.02504775 0.001030797 0.02944812 0.0006173975
## 128   128 0.03255495 0.19849624 0.02504189 0.001041160 0.02975462 0.0006275435
## 129   129 0.03254689 0.19885474 0.02503136 0.001045510 0.02975956 0.0006285764
## 130   130 0.03254124 0.19908433 0.02503127 0.001046366 0.02953287 0.0006276257
## 131   131 0.03252780 0.19964649 0.02501500 0.001043731 0.02924524 0.0006274260
## 132   132 0.03252810 0.19960711 0.02501068 0.001042338 0.02892935 0.0006275960
## 133   133 0.03252764 0.19967097 0.02500755 0.001039124 0.02879470 0.0006203663
## 134   134 0.03251918 0.20005129 0.02500024 0.001040318 0.02875404 0.0006213569
## 135   135 0.03251930 0.20002882 0.02500194 0.001039735 0.02830695 0.0006198228
## 136   136 0.03251125 0.20037427 0.02499889 0.001037530 0.02823126 0.0006126105
## 137   137 0.03250704 0.20059952 0.02499500 0.001036773 0.02823638 0.0006121251
## 138   138 0.03251285 0.20034981 0.02500062 0.001044982 0.02835262 0.0006222300
## 139   139 0.03250465 0.20071155 0.02499098 0.001045661 0.02859294 0.0006205320
## 140   140 0.03249890 0.20097513 0.02498661 0.001040657 0.02862525 0.0006181001
## 141   141 0.03250081 0.20092660 0.02499028 0.001041602 0.02880550 0.0006250471
## 142   142 0.03249008 0.20139991 0.02498098 0.001052513 0.02901324 0.0006357751
## 143   143 0.03248652 0.20156144 0.02498017 0.001045031 0.02868918 0.0006285989
## 144   144 0.03248796 0.20150544 0.02498105 0.001043399 0.02857550 0.0006313261
## 145   145 0.03248075 0.20178772 0.02498347 0.001036916 0.02837806 0.0006247206
## 146   146 0.03248350 0.20166458 0.02499081 0.001035209 0.02803601 0.0006243968
## 147   147 0.03247924 0.20186388 0.02498506 0.001037507 0.02813287 0.0006240449
## 148   148 0.03247694 0.20196577 0.02498424 0.001030821 0.02781127 0.0006195992
## 149   149 0.03247470 0.20207125 0.02497980 0.001033645 0.02767566 0.0006201489
## 150   150 0.03247238 0.20217754 0.02497296 0.001032874 0.02733159 0.0006171189
## 151   151 0.03247503 0.20206406 0.02497298 0.001031861 0.02729143 0.0006160999
## 152   152 0.03246887 0.20233018 0.02496653 0.001032963 0.02724557 0.0006138307
## 153   153 0.03246823 0.20238775 0.02496492 0.001037882 0.02750654 0.0006210431
## 154   154 0.03247110 0.20225971 0.02496743 0.001041489 0.02762947 0.0006242485
## 155   155 0.03247025 0.20229274 0.02496895 0.001037034 0.02762607 0.0006217778
## 156   156 0.03246732 0.20243027 0.02496600 0.001044002 0.02786182 0.0006233951
## 157   157 0.03246675 0.20244585 0.02496398 0.001039105 0.02778159 0.0006193942
## 158   158 0.03246989 0.20228879 0.02497063 0.001039978 0.02763638 0.0006241771
## 159   159 0.03246798 0.20238523 0.02496872 0.001040278 0.02787452 0.0006275859
## 160   160 0.03246699 0.20242881 0.02496745 0.001037409 0.02769042 0.0006270567
## 161   161 0.03246565 0.20246760 0.02496350 0.001030303 0.02753885 0.0006188874
## 162   162 0.03246866 0.20233348 0.02496534 0.001027694 0.02746149 0.0006180339
## 163   163 0.03246917 0.20231577 0.02496686 0.001028064 0.02751377 0.0006199296
## 164   164 0.03246766 0.20236003 0.02496410 0.001023622 0.02752972 0.0006136923
## 165   165 0.03246174 0.20261088 0.02495939 0.001022985 0.02754941 0.0006115800
## 166   166 0.03246180 0.20258975 0.02495916 0.001022469 0.02745196 0.0006101929
## 167   167 0.03246264 0.20255252 0.02496018 0.001025104 0.02739575 0.0006125458
## 168   168 0.03246709 0.20237992 0.02496434 0.001024210 0.02741399 0.0006102726
## 169   169 0.03246893 0.20230521 0.02496476 0.001025024 0.02743181 0.0006069247
## 170   170 0.03246931 0.20227643 0.02496608 0.001022759 0.02734819 0.0006046910
## 171   171 0.03247017 0.20223287 0.02496786 0.001021818 0.02726144 0.0006030136
## 172   172 0.03247008 0.20224760 0.02496794 0.001021648 0.02736475 0.0006046915
## 173   173 0.03247132 0.20218821 0.02496974 0.001020320 0.02732321 0.0006023293
## 174   174 0.03247099 0.20220905 0.02497049 0.001018765 0.02743525 0.0005992589
## 175   175 0.03247034 0.20224800 0.02497051 0.001021682 0.02755310 0.0006013727
## 176   176 0.03246996 0.20226472 0.02497022 0.001021454 0.02745719 0.0005999913
## 177   177 0.03247152 0.20219542 0.02497151 0.001021322 0.02747919 0.0006007170
## 178   178 0.03247114 0.20221044 0.02497049 0.001021037 0.02748226 0.0005986215
## 179   179 0.03247160 0.20219340 0.02497094 0.001023406 0.02744409 0.0005982308
## 180   180 0.03247056 0.20223176 0.02496903 0.001023439 0.02734006 0.0005970432
## 181   181 0.03247133 0.20219966 0.02496923 0.001023363 0.02738263 0.0005980545
## 182   182 0.03247106 0.20221489 0.02496841 0.001023940 0.02748334 0.0005977092
## 183   183 0.03247104 0.20221116 0.02496862 0.001022319 0.02744137 0.0005976230
## 184   184 0.03247220 0.20216074 0.02497008 0.001021979 0.02737803 0.0005978982
## 185   185 0.03247189 0.20217386 0.02497027 0.001021472 0.02733071 0.0005975205
## 186   186 0.03247181 0.20218233 0.02497044 0.001023055 0.02734702 0.0005993725
## 187   187 0.03247160 0.20219051 0.02497039 0.001023325 0.02734271 0.0005999549
## 188   188 0.03247241 0.20215773 0.02497135 0.001023839 0.02738431 0.0006009079
## 189   189 0.03247227 0.20216222 0.02497123 0.001023517 0.02738704 0.0006007021
## 190   190 0.03247243 0.20215757 0.02497159 0.001023711 0.02739854 0.0006008892
## 191   191 0.03247213 0.20217054 0.02497127 0.001023876 0.02741584 0.0006005182
## 192   192 0.03247237 0.20216046 0.02497129 0.001023763 0.02741608 0.0006007176
## 193   193 0.03247255 0.20215266 0.02497147 0.001023653 0.02741320 0.0006005485
## [1] "Best Model"
##     nvmax
## 165   165

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.0966743524  2.095840e+00  2.097508e+00
## PC1         -0.0009049676 -1.083844e-03 -7.260907e-04
## PC2          0.0012140169  1.016690e-03  1.411344e-03
## PC3         -0.0002795553 -4.959098e-04 -6.320073e-05
## PC4          0.0001500536 -7.038329e-05  3.704906e-04
## PC5          0.0006088768  3.883815e-04  8.293721e-04
## PC6          0.0001125261 -1.117121e-04  3.367644e-04
## PC7         -0.0004976586 -7.241999e-04 -2.711173e-04
## PC8         -0.0003141728 -5.440046e-04 -8.434102e-05
## PC9         -0.0002203596 -4.567376e-04  1.601842e-05
## PC10         0.0002504655  1.049448e-05  4.904365e-04
## PC11        -0.0014282252 -1.673753e-03 -1.182698e-03
## PC12         0.0003757187  1.288141e-04  6.226233e-04
## PC13         0.0004538646  2.016142e-04  7.061150e-04
## PC14         0.0015449000  1.289707e-03  1.800093e-03
## PC15        -0.0004179924 -6.761995e-04 -1.597853e-04
## PC16         0.0007435102  4.771006e-04  1.009920e-03
## PC17        -0.0002027179 -4.760350e-04  7.059915e-05
## PC18         0.0002628240 -1.655793e-05  5.422059e-04
## PC19        -0.0002062347 -4.949100e-04  8.244059e-05
## PC20         0.0009425082  6.421689e-04  1.242847e-03
## PC21        -0.0010153050 -1.325855e-03 -7.047552e-04
## PC22         0.0041047459  3.780751e-03  4.428741e-03
## PC23        -0.0005419278 -1.188404e-03  1.045486e-04
## PC24        -0.0011782153 -1.892608e-03 -4.638229e-04
## PC25         0.0006344173 -8.273767e-05  1.351572e-03
## PC26        -0.0003120001 -1.024234e-03  4.002338e-04
## PC27        -0.0004700044 -1.186125e-03  2.461158e-04
## PC28         0.0006790259 -4.115660e-05  1.399208e-03
## PC29        -0.0006818032 -1.401244e-03  3.763804e-05
## PC30         0.0003592277 -3.696720e-04  1.088127e-03
## PC32         0.0011994645  4.679920e-04  1.930937e-03
## PC33         0.0002997233 -4.193797e-04  1.018826e-03
## PC34         0.0002616703 -4.722802e-04  9.956209e-04
## PC36         0.0005114395 -2.182976e-04  1.241177e-03
## PC37        -0.0003355461 -1.070123e-03  3.990308e-04
## PC38         0.0004344763 -3.028881e-04  1.171841e-03
## PC39         0.0003451893 -3.939132e-04  1.084292e-03
## PC40         0.0003894461 -3.487638e-04  1.127656e-03
## PC41        -0.0006644526 -1.403295e-03  7.439033e-05
## PC42        -0.0004883539 -1.230427e-03  2.537189e-04
## PC43         0.0009571772  2.217230e-04  1.692631e-03
## PC44        -0.0003387228 -1.077106e-03  3.996600e-04
## PC45         0.0014007629  6.585599e-04  2.142966e-03
## PC46         0.0016938942  9.426549e-04  2.445133e-03
## PC47         0.0003118516 -4.414656e-04  1.065169e-03
## PC48        -0.0003526961 -1.100760e-03  3.953680e-04
## PC49         0.0003890067 -3.598131e-04  1.137827e-03
## PC50        -0.0006686539 -1.417545e-03  8.023759e-05
## PC51         0.0004486878 -3.072433e-04  1.204619e-03
## PC52         0.0001752847 -5.779587e-04  9.285281e-04
## PC53        -0.0003592069 -1.117007e-03  3.985928e-04
## PC55         0.0002028839 -5.612425e-04  9.670104e-04
## PC56         0.0001686360 -5.888921e-04  9.261642e-04
## PC57         0.0004316819 -3.274207e-04  1.190784e-03
## PC58        -0.0004315006 -1.197407e-03  3.344054e-04
## PC59         0.0014939212  7.322477e-04  2.255595e-03
## PC60        -0.0010186293 -1.780994e-03 -2.562647e-04
## PC61        -0.0005928354 -1.360066e-03  1.743947e-04
## PC62        -0.0002034974 -9.701223e-04  5.631275e-04
## PC63         0.0004843173 -2.813009e-04  1.249935e-03
## PC64        -0.0001758589 -9.436420e-04  5.919241e-04
## PC65         0.0009866345  2.196406e-04  1.753628e-03
## PC66         0.0004330656 -3.434311e-04  1.209562e-03
## PC67         0.0006356282 -1.331155e-04  1.404372e-03
## PC68         0.0007790931  7.334388e-06  1.550852e-03
## PC69        -0.0017464448 -2.522593e-03 -9.702964e-04
## PC70         0.0002374992 -5.377967e-04  1.012795e-03
## PC71        -0.0006290740 -1.403397e-03  1.452490e-04
## PC72         0.0005181758 -2.626325e-04  1.298984e-03
## PC73         0.0004983506 -2.793461e-04  1.276047e-03
## PC74        -0.0001663813 -9.491878e-04  6.164253e-04
## PC75         0.0003121803 -4.708293e-04  1.095190e-03
## PC76         0.0012720467  4.906518e-04  2.053442e-03
## PC78        -0.0003994344 -1.184636e-03  3.857668e-04
## PC79         0.0007698287 -1.564983e-05  1.555307e-03
## PC80        -0.0006520201 -1.435448e-03  1.314077e-04
## PC82        -0.0009789431 -1.759943e-03 -1.979436e-04
## PC83         0.0010988317  3.109389e-04  1.886725e-03
## PC85         0.0001660642 -6.255805e-04  9.577089e-04
## PC86         0.0003496346 -4.428536e-04  1.142123e-03
## PC87         0.0004451030 -3.489412e-04  1.239147e-03
## PC88         0.0002318335 -5.562157e-04  1.019883e-03
## PC89         0.0003808273 -4.149632e-04  1.176618e-03
## PC90        -0.0020087691 -2.808799e-03 -1.208740e-03
## PC91         0.0006604596 -1.347978e-04  1.455717e-03
## PC92        -0.0005247861 -1.322298e-03  2.727257e-04
## PC93         0.0002036291 -5.947062e-04  1.001964e-03
## PC94         0.0005510716 -2.462237e-04  1.348367e-03
## PC95        -0.0002462988 -1.047223e-03  5.546255e-04
## PC96        -0.0006872921 -1.491586e-03  1.170019e-04
## PC98         0.0005269882 -2.765694e-04  1.330546e-03
## PC101       -0.0013354257 -2.135917e-03 -5.349344e-04
## PC102        0.0015452159  7.388632e-04  2.351569e-03
## PC104        0.0007226484 -8.558940e-05  1.530886e-03
## PC105        0.0001688957 -6.385070e-04  9.762984e-04
## PC106        0.0013249636  5.150856e-04  2.134842e-03
## PC107        0.0001856730 -6.255479e-04  9.968939e-04
## PC108       -0.0010089374 -1.823089e-03 -1.947857e-04
## PC111       -0.0006854404 -1.503617e-03  1.327362e-04
## PC112        0.0007218671 -1.006344e-04  1.544369e-03
## PC114       -0.0004823893 -1.306658e-03  3.418798e-04
## PC115       -0.0007510769 -1.575450e-03  7.329638e-05
## PC117       -0.0017330605 -2.551275e-03 -9.148456e-04
## PC118       -0.0010582700 -1.884554e-03 -2.319859e-04
## PC119       -0.0010043509 -1.836100e-03 -1.726014e-04
## PC120       -0.0006520716 -1.478225e-03  1.740823e-04
## PC121        0.0007367603 -9.451144e-05  1.568032e-03
## PC123       -0.0007913389 -1.628092e-03  4.541387e-05
## PC124       -0.0002753178 -1.105457e-03  5.548218e-04
## PC125        0.0003233625 -5.102690e-04  1.156994e-03
## PC126        0.0009520484  1.201403e-04  1.783956e-03
## PC127        0.0013921456  5.586443e-04  2.225647e-03
## PC128       -0.0008839796 -1.721751e-03 -4.620797e-05
## PC129       -0.0008380986 -1.681385e-03  5.187439e-06
## PC131        0.0010265727  1.948022e-04  1.858343e-03
## PC132        0.0015360116  6.961791e-04  2.375844e-03
## PC133       -0.0002152622 -1.049469e-03  6.189442e-04
## PC134       -0.0001882865 -1.026806e-03  6.502335e-04
## PC135       -0.0005054907 -1.351296e-03  3.403150e-04
## PC137        0.0003858280 -4.626697e-04  1.234326e-03
## PC138        0.0011356535  2.962514e-04  1.975056e-03
## PC139       -0.0013163614 -2.161652e-03 -4.710707e-04
## PC142        0.0005530691 -2.991333e-04  1.405272e-03
## PC143       -0.0010254284 -1.880288e-03 -1.705686e-04
## PC145        0.0005098362 -3.457919e-04  1.365464e-03
## PC146       -0.0003701883 -1.222235e-03  4.818584e-04
## PC147        0.0009946295  1.389404e-04  1.850319e-03
## PC148        0.0007943120 -6.512855e-05  1.653752e-03
## PC149       -0.0003687632 -1.224515e-03  4.869885e-04
## PC150        0.0003343696 -5.296961e-04  1.198435e-03
## PC151        0.0006582715 -2.057141e-04  1.522257e-03
## PC153       -0.0002331037 -1.100823e-03  6.346156e-04
## PC154        0.0005585424 -3.036037e-04  1.420688e-03
## PC155        0.0006758915 -1.910115e-04  1.542795e-03
## PC156       -0.0013557997 -2.230491e-03 -4.811087e-04
## PC157        0.0011142332  2.427858e-04  1.985681e-03
## PC158       -0.0002926967 -1.163527e-03  5.781336e-04
## PC159       -0.0003955034 -1.261663e-03  4.706565e-04
## PC160        0.0014287912  5.627705e-04  2.294812e-03
## PC161       -0.0001810139 -1.054639e-03  6.926109e-04
## PC162        0.0004204782 -4.506401e-04  1.291596e-03
## PC163        0.0005048545 -3.670057e-04  1.376715e-03
## PC164       -0.0003702884 -1.248583e-03  5.080067e-04
## PC166       -0.0004966723 -1.374447e-03  3.811023e-04
## PC167        0.0004720439 -4.037883e-04  1.347876e-03
## PC170       -0.0007165705 -1.595553e-03  1.624124e-04
## PC171        0.0006382425 -2.376219e-04  1.514107e-03
## PC172        0.0005221531 -3.621022e-04  1.406408e-03
## PC173       -0.0011111284 -1.997185e-03 -2.250724e-04
## PC174        0.0009658581  8.191380e-05  1.849802e-03
## PC175        0.0007042264 -1.852683e-04  1.593721e-03
## PC179       -0.0015329190 -2.427945e-03 -6.378927e-04
## PC180        0.0010984029  1.984913e-04  1.998315e-03
## PC181       -0.0008781256 -1.772668e-03  1.641666e-05
## PC182       -0.0002277114 -1.126785e-03  6.713624e-04
## PC183       -0.0007057617 -1.602555e-03  1.910313e-04
## PC184       -0.0005281246 -1.429204e-03  3.729553e-04
## PC186        0.0009900218  8.573069e-05  1.894313e-03
## PC187        0.0011887661  2.881999e-04  2.089332e-03
## PC188       -0.0007435318 -1.647150e-03  1.600868e-04
## PC189        0.0009600851  4.714479e-05  1.873025e-03
## PC190       -0.0003711605 -1.274941e-03  5.326201e-04
## PC191       -0.0004190125 -1.322567e-03  4.845419e-04
## PC192       -0.0018522366 -2.762783e-03 -9.416901e-04
## PC193        0.0002352162 -6.798789e-04  1.150311e-03

Test

if (algo.forward.caret == TRUE){
    test.model(model=model.forward, test=data.test
             ,method = 'leapForward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.039   2.083   2.098   2.097   2.110   2.154 
## [1] "leapForward  Test MSE: 0.00102836422121638"

Backward Elimination with CV

Train

if (algo.backward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapBackward"
                                   ,feature.names =  feature.names)
  model.backward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 165 on full training set
## [1] "All models results"
##     nvmax       RMSE   Rsquared        MAE      RMSESD RsquaredSD        MAESD
## 1       1 0.03466409 0.08593606 0.02690290 0.001099875 0.02279859 0.0006370046
## 2       2 0.03444082 0.09774700 0.02676647 0.001072722 0.02436339 0.0005267890
## 3       3 0.03407079 0.11785519 0.02643370 0.001163325 0.03529461 0.0006039663
## 4       4 0.03360175 0.14159740 0.02603125 0.001128829 0.03712006 0.0006157767
## 5       5 0.03334636 0.15519820 0.02585069 0.001161894 0.04142363 0.0006092038
## 6       6 0.03329792 0.15764518 0.02580989 0.001164457 0.04105992 0.0006217137
## 7       7 0.03321062 0.16192922 0.02580111 0.001118805 0.04024003 0.0006483500
## 8       8 0.03312435 0.16641525 0.02571374 0.001182956 0.04135877 0.0006559573
## 9       9 0.03307574 0.16922910 0.02568986 0.001244109 0.04504879 0.0007230730
## 10     10 0.03303978 0.17078808 0.02565820 0.001240144 0.04278899 0.0007128830
## 11     11 0.03300951 0.17214185 0.02560873 0.001182527 0.04066502 0.0006407614
## 12     12 0.03297259 0.17391369 0.02556379 0.001151144 0.04045896 0.0005755534
## 13     13 0.03290594 0.17734479 0.02553116 0.001187443 0.04207657 0.0006026256
## 14     14 0.03288937 0.17815481 0.02550390 0.001186010 0.04148991 0.0006088042
## 15     15 0.03289863 0.17779109 0.02549328 0.001181655 0.04154013 0.0005901351
## 16     16 0.03290108 0.17770618 0.02551469 0.001177906 0.04159493 0.0006027152
## 17     17 0.03291394 0.17710046 0.02551820 0.001175968 0.03925230 0.0005886936
## 18     18 0.03290527 0.17748093 0.02551383 0.001175942 0.03807660 0.0006100222
## 19     19 0.03290986 0.17743033 0.02552582 0.001190951 0.03969890 0.0006145930
## 20     20 0.03289854 0.17787885 0.02547556 0.001162280 0.03714049 0.0006057325
## 21     21 0.03291432 0.17732975 0.02547539 0.001168697 0.03664888 0.0006219098
## 22     22 0.03294549 0.17604413 0.02549380 0.001194372 0.03688412 0.0006104124
## 23     23 0.03293628 0.17674048 0.02547452 0.001209655 0.03764623 0.0006272876
## 24     24 0.03292027 0.17754091 0.02546476 0.001231337 0.03800989 0.0006565158
## 25     25 0.03294866 0.17630971 0.02550823 0.001208678 0.03719119 0.0006546398
## 26     26 0.03290973 0.17806409 0.02548409 0.001204610 0.03653682 0.0006528609
## 27     27 0.03288179 0.17944590 0.02544581 0.001180425 0.03581064 0.0006400661
## 28     28 0.03287044 0.18015100 0.02543940 0.001192554 0.03593316 0.0006496120
## 29     29 0.03287131 0.18034661 0.02542594 0.001196513 0.03687422 0.0006608440
## 30     30 0.03284611 0.18152416 0.02539431 0.001183288 0.03732892 0.0006591165
## 31     31 0.03286035 0.18097189 0.02541506 0.001203383 0.03769555 0.0006720774
## 32     32 0.03287148 0.18055918 0.02543027 0.001202684 0.03672015 0.0006558045
## 33     33 0.03286444 0.18087282 0.02543200 0.001177290 0.03530578 0.0006492820
## 34     34 0.03283862 0.18224920 0.02539738 0.001185464 0.03559099 0.0006674890
## 35     35 0.03278876 0.18461549 0.02534949 0.001206563 0.03672987 0.0006762822
## 36     36 0.03280163 0.18412308 0.02535033 0.001197585 0.03608882 0.0006791103
## 37     37 0.03277155 0.18548928 0.02533325 0.001177708 0.03594142 0.0006724391
## 38     38 0.03274453 0.18687692 0.02530497 0.001179260 0.03636114 0.0006871403
## 39     39 0.03270068 0.18905392 0.02525529 0.001227603 0.03708889 0.0007407069
## 40     40 0.03266170 0.19082541 0.02521858 0.001201796 0.03556663 0.0007026219
## 41     41 0.03264771 0.19156584 0.02521916 0.001197933 0.03565503 0.0007041501
## 42     42 0.03264541 0.19165742 0.02521095 0.001210882 0.03489479 0.0007301137
## 43     43 0.03266293 0.19084759 0.02522772 0.001208751 0.03383334 0.0007344629
## 44     44 0.03263585 0.19217594 0.02519435 0.001204418 0.03387020 0.0007364569
## 45     45 0.03262653 0.19269199 0.02518192 0.001239895 0.03427572 0.0007562008
## 46     46 0.03259441 0.19422170 0.02515667 0.001247348 0.03399717 0.0007632970
## 47     47 0.03260068 0.19409069 0.02515597 0.001257326 0.03374400 0.0007729498
## 48     48 0.03259143 0.19445218 0.02515487 0.001243201 0.03293494 0.0007662776
## 49     49 0.03258654 0.19466559 0.02514511 0.001227054 0.03227825 0.0007461070
## 50     50 0.03258122 0.19496342 0.02514312 0.001203495 0.03138270 0.0007384793
## 51     51 0.03258135 0.19498368 0.02514561 0.001208911 0.03093455 0.0007297054
## 52     52 0.03257773 0.19516069 0.02513577 0.001195345 0.03114276 0.0007188847
## 53     53 0.03255960 0.19602272 0.02510879 0.001191656 0.03074620 0.0007114939
## 54     54 0.03256057 0.19609488 0.02509477 0.001206645 0.03142622 0.0007371961
## 55     55 0.03257504 0.19542696 0.02509104 0.001186708 0.03057189 0.0007199286
## 56     56 0.03258198 0.19509606 0.02508762 0.001167506 0.02959148 0.0007038287
## 57     57 0.03256769 0.19593030 0.02507026 0.001179976 0.03087632 0.0007136187
## 58     58 0.03256565 0.19613047 0.02508095 0.001197473 0.03155944 0.0007170993
## 59     59 0.03256722 0.19620490 0.02507766 0.001217425 0.03250321 0.0007263410
## 60     60 0.03256357 0.19629816 0.02508004 0.001189347 0.03157874 0.0007006144
## 61     61 0.03255962 0.19652412 0.02508749 0.001201864 0.03124339 0.0007110340
## 62     62 0.03257423 0.19585968 0.02508636 0.001195130 0.03108471 0.0007141600
## 63     63 0.03258404 0.19545620 0.02508389 0.001176222 0.03121322 0.0006935737
## 64     64 0.03257839 0.19577890 0.02508127 0.001177021 0.03114526 0.0006931972
## 65     65 0.03257756 0.19581735 0.02508056 0.001173401 0.03098498 0.0006893297
## 66     66 0.03258869 0.19530634 0.02509753 0.001140058 0.02965248 0.0006635233
## 67     67 0.03256646 0.19626134 0.02507898 0.001117110 0.02849669 0.0006340434
## 68     68 0.03255343 0.19691128 0.02507176 0.001109497 0.02938926 0.0006445680
## 69     69 0.03256737 0.19634751 0.02508579 0.001104840 0.03004726 0.0006429183
## 70     70 0.03255824 0.19684368 0.02509131 0.001101383 0.03044477 0.0006407687
## 71     71 0.03256110 0.19674033 0.02509514 0.001094042 0.03012048 0.0006304117
## 72     72 0.03254576 0.19741448 0.02508033 0.001100201 0.02982364 0.0006411317
## 73     73 0.03255488 0.19704900 0.02509553 0.001117268 0.03028940 0.0006569577
## 74     74 0.03254935 0.19742379 0.02509158 0.001119951 0.03052451 0.0006608270
## 75     75 0.03254452 0.19761294 0.02508447 0.001100104 0.03007702 0.0006481972
## 76     76 0.03254718 0.19753065 0.02508002 0.001103448 0.02994345 0.0006470818
## 77     77 0.03253706 0.19797141 0.02507307 0.001098316 0.02973806 0.0006370409
## 78     78 0.03255593 0.19719507 0.02509074 0.001094420 0.02960665 0.0006345276
## 79     79 0.03256750 0.19668276 0.02509715 0.001100461 0.02950819 0.0006604717
## 80     80 0.03256502 0.19677971 0.02509572 0.001096330 0.02939483 0.0006516649
## 81     81 0.03257930 0.19623676 0.02510267 0.001089863 0.02927428 0.0006582889
## 82     82 0.03258386 0.19609264 0.02510113 0.001085970 0.02961586 0.0006578117
## 83     83 0.03259338 0.19570270 0.02510023 0.001069510 0.02922196 0.0006409973
## 84     84 0.03259120 0.19578823 0.02509764 0.001064527 0.02920549 0.0006278386
## 85     85 0.03261202 0.19486282 0.02510867 0.001073713 0.02844418 0.0006342299
## 86     86 0.03261358 0.19484785 0.02511854 0.001076796 0.02805120 0.0006483060
## 87     87 0.03259702 0.19559092 0.02510269 0.001073105 0.02810064 0.0006352993
## 88     88 0.03256803 0.19689922 0.02508660 0.001066001 0.02825797 0.0006195906
## 89     89 0.03256736 0.19695217 0.02508163 0.001065626 0.02802362 0.0006176518
## 90     90 0.03256297 0.19721330 0.02508150 0.001051334 0.02798823 0.0006083469
## 91     91 0.03257145 0.19686044 0.02508332 0.001056171 0.02779070 0.0006209485
## 92     92 0.03256734 0.19710270 0.02507643 0.001048782 0.02762908 0.0006163546
## 93     93 0.03257330 0.19694692 0.02507419 0.001032396 0.02764995 0.0006126340
## 94     94 0.03257156 0.19702104 0.02507014 0.001047608 0.02803663 0.0006280578
## 95     95 0.03257380 0.19691055 0.02506740 0.001046725 0.02818312 0.0006352863
## 96     96 0.03256322 0.19738193 0.02505740 0.001041936 0.02779386 0.0006347903
## 97     97 0.03257105 0.19707388 0.02505358 0.001040202 0.02783706 0.0006326067
## 98     98 0.03256183 0.19757014 0.02504607 0.001048443 0.02810340 0.0006352571
## 99     99 0.03256105 0.19762515 0.02503788 0.001048698 0.02819451 0.0006307550
## 100   100 0.03255621 0.19777884 0.02502355 0.001040621 0.02759026 0.0006226720
## 101   101 0.03256605 0.19734365 0.02503087 0.001041856 0.02732003 0.0006152447
## 102   102 0.03257810 0.19691091 0.02504285 0.001048413 0.02739315 0.0006250705
## 103   103 0.03258871 0.19647638 0.02505169 0.001063812 0.02744519 0.0006326027
## 104   104 0.03259177 0.19640613 0.02505633 0.001056479 0.02779627 0.0006318446
## 105   105 0.03259972 0.19609331 0.02506409 0.001053927 0.02787179 0.0006363950
## 106   106 0.03259346 0.19638043 0.02505964 0.001061245 0.02848324 0.0006499882
## 107   107 0.03259731 0.19620788 0.02506104 0.001055439 0.02868784 0.0006491535
## 108   108 0.03259443 0.19641660 0.02506130 0.001065440 0.02927143 0.0006545463
## 109   109 0.03259579 0.19638360 0.02505221 0.001063869 0.02919751 0.0006524737
## 110   110 0.03259726 0.19640394 0.02505880 0.001071337 0.02954749 0.0006555193
## 111   111 0.03260097 0.19629313 0.02506177 0.001076556 0.02969670 0.0006688246
## 112   112 0.03259469 0.19656438 0.02505700 0.001069007 0.02955977 0.0006603873
## 113   113 0.03258183 0.19719074 0.02504296 0.001071521 0.02975140 0.0006538304
## 114   114 0.03257508 0.19751820 0.02503979 0.001069395 0.02971780 0.0006538662
## 115   115 0.03257553 0.19750859 0.02503892 0.001063611 0.02988684 0.0006548687
## 116   116 0.03256973 0.19770189 0.02503499 0.001065788 0.03002326 0.0006583484
## 117   117 0.03256634 0.19787467 0.02503721 0.001063454 0.02976176 0.0006590145
## 118   118 0.03257024 0.19770856 0.02504172 0.001064299 0.02957566 0.0006563693
## 119   119 0.03256303 0.19801452 0.02503209 0.001056974 0.02931644 0.0006454175
## 120   120 0.03256570 0.19794068 0.02503814 0.001045492 0.02928000 0.0006373570
## 121   121 0.03256857 0.19784469 0.02504314 0.001044243 0.02916220 0.0006346715
## 122   122 0.03255844 0.19827617 0.02503608 0.001043861 0.02930714 0.0006364734
## 123   123 0.03256435 0.19803491 0.02503959 0.001034970 0.02900398 0.0006178213
## 124   124 0.03256985 0.19783048 0.02504350 0.001028169 0.02917549 0.0006141822
## 125   125 0.03257470 0.19762978 0.02505065 0.001035215 0.02944951 0.0006206520
## 126   126 0.03257185 0.19774902 0.02504810 0.001025157 0.02909225 0.0006099171
## 127   127 0.03256789 0.19793199 0.02504775 0.001030797 0.02944812 0.0006173975
## 128   128 0.03255495 0.19849624 0.02504189 0.001041160 0.02975462 0.0006275435
## 129   129 0.03254689 0.19885474 0.02503136 0.001045510 0.02975956 0.0006285764
## 130   130 0.03254124 0.19908433 0.02503127 0.001046366 0.02953287 0.0006276257
## 131   131 0.03252780 0.19964649 0.02501500 0.001043731 0.02924524 0.0006274260
## 132   132 0.03252810 0.19960711 0.02501068 0.001042338 0.02892935 0.0006275960
## 133   133 0.03252305 0.19987287 0.02500438 0.001041951 0.02875767 0.0006223761
## 134   134 0.03251718 0.20014091 0.02500007 0.001040271 0.02873565 0.0006213355
## 135   135 0.03251567 0.20020409 0.02499864 0.001042562 0.02828569 0.0006218822
## 136   136 0.03251125 0.20037427 0.02499889 0.001037530 0.02823126 0.0006126105
## 137   137 0.03250704 0.20059952 0.02499500 0.001036773 0.02823638 0.0006121251
## 138   138 0.03251285 0.20034981 0.02500062 0.001044982 0.02835262 0.0006222300
## 139   139 0.03250465 0.20071155 0.02499098 0.001045661 0.02859294 0.0006205320
## 140   140 0.03249890 0.20097513 0.02498661 0.001040657 0.02862525 0.0006181001
## 141   141 0.03250226 0.20087418 0.02499139 0.001040730 0.02881539 0.0006243487
## 142   142 0.03249150 0.20134836 0.02498214 0.001051616 0.02902012 0.0006349900
## 143   143 0.03248652 0.20156144 0.02498017 0.001045031 0.02868918 0.0006285989
## 144   144 0.03248796 0.20150544 0.02498105 0.001043399 0.02857550 0.0006313261
## 145   145 0.03248075 0.20178772 0.02498347 0.001036916 0.02837806 0.0006247206
## 146   146 0.03248350 0.20166458 0.02499081 0.001035209 0.02803601 0.0006243968
## 147   147 0.03247924 0.20186388 0.02498506 0.001037507 0.02813287 0.0006240449
## 148   148 0.03247694 0.20196577 0.02498424 0.001030821 0.02781127 0.0006195992
## 149   149 0.03247374 0.20210080 0.02497894 0.001031210 0.02762254 0.0006184684
## 150   150 0.03247221 0.20218567 0.02497489 0.001032464 0.02731686 0.0006208865
## 151   151 0.03247453 0.20208420 0.02497336 0.001031498 0.02726302 0.0006166626
## 152   152 0.03246866 0.20233384 0.02496708 0.001032259 0.02721875 0.0006142642
## 153   153 0.03246853 0.20237076 0.02496511 0.001037548 0.02750892 0.0006209260
## 154   154 0.03247110 0.20225971 0.02496743 0.001041489 0.02762947 0.0006242485
## 155   155 0.03247025 0.20229274 0.02496895 0.001037034 0.02762607 0.0006217778
## 156   156 0.03246732 0.20243027 0.02496600 0.001044002 0.02786182 0.0006233951
## 157   157 0.03246675 0.20244585 0.02496398 0.001039105 0.02778159 0.0006193942
## 158   158 0.03246989 0.20228879 0.02497063 0.001039978 0.02763638 0.0006241771
## 159   159 0.03246798 0.20238523 0.02496872 0.001040278 0.02787452 0.0006275859
## 160   160 0.03246699 0.20242881 0.02496745 0.001037409 0.02769042 0.0006270567
## 161   161 0.03246565 0.20246760 0.02496350 0.001030303 0.02753885 0.0006188874
## 162   162 0.03246734 0.20239074 0.02496494 0.001027078 0.02741519 0.0006177684
## 163   163 0.03246684 0.20241051 0.02496523 0.001027001 0.02743835 0.0006188574
## 164   164 0.03246435 0.20250464 0.02496176 0.001022122 0.02741572 0.0006121763
## 165   165 0.03245940 0.20270642 0.02495768 0.001021929 0.02747435 0.0006104868
## 166   166 0.03246180 0.20258975 0.02495916 0.001022469 0.02745196 0.0006101929
## 167   167 0.03246264 0.20255252 0.02496018 0.001025104 0.02739575 0.0006125458
## 168   168 0.03246709 0.20237992 0.02496434 0.001024210 0.02741399 0.0006102726
## 169   169 0.03246893 0.20230521 0.02496476 0.001025024 0.02743181 0.0006069247
## 170   170 0.03246931 0.20227643 0.02496608 0.001022759 0.02734819 0.0006046910
## 171   171 0.03246986 0.20224827 0.02496788 0.001021873 0.02725907 0.0006030024
## 172   172 0.03246977 0.20226310 0.02496797 0.001021703 0.02736225 0.0006046794
## 173   173 0.03247132 0.20218821 0.02496974 0.001020320 0.02732321 0.0006023293
## 174   174 0.03247099 0.20220905 0.02497049 0.001018765 0.02743525 0.0005992589
## 175   175 0.03247034 0.20224800 0.02497051 0.001021682 0.02755310 0.0006013727
## 176   176 0.03246996 0.20226472 0.02497022 0.001021454 0.02745719 0.0005999913
## 177   177 0.03247152 0.20219542 0.02497151 0.001021322 0.02747919 0.0006007170
## 178   178 0.03247114 0.20221044 0.02497049 0.001021037 0.02748226 0.0005986215
## 179   179 0.03247160 0.20219340 0.02497094 0.001023406 0.02744409 0.0005982308
## 180   180 0.03247056 0.20223176 0.02496903 0.001023439 0.02734006 0.0005970432
## 181   181 0.03247133 0.20219966 0.02496923 0.001023363 0.02738263 0.0005980545
## 182   182 0.03247106 0.20221489 0.02496841 0.001023940 0.02748334 0.0005977092
## 183   183 0.03247104 0.20221116 0.02496862 0.001022319 0.02744137 0.0005976230
## 184   184 0.03247220 0.20216074 0.02497008 0.001021979 0.02737803 0.0005978982
## 185   185 0.03247189 0.20217386 0.02497027 0.001021472 0.02733071 0.0005975205
## 186   186 0.03247181 0.20218233 0.02497044 0.001023055 0.02734702 0.0005993725
## 187   187 0.03247160 0.20219051 0.02497039 0.001023325 0.02734271 0.0005999549
## 188   188 0.03247241 0.20215773 0.02497135 0.001023839 0.02738431 0.0006009079
## 189   189 0.03247236 0.20215773 0.02497109 0.001023735 0.02739509 0.0006004119
## 190   190 0.03247252 0.20215298 0.02497145 0.001023934 0.02740676 0.0006005976
## 191   191 0.03247213 0.20217054 0.02497127 0.001023876 0.02741584 0.0006005182
## 192   192 0.03247237 0.20216046 0.02497129 0.001023763 0.02741608 0.0006007176
## 193   193 0.03247255 0.20215266 0.02497147 0.001023653 0.02741320 0.0006005485
## [1] "Best Model"
##     nvmax
## 165   165

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.0966743524  2.095840e+00  2.097508e+00
## PC1         -0.0009049676 -1.083844e-03 -7.260907e-04
## PC2          0.0012140169  1.016690e-03  1.411344e-03
## PC3         -0.0002795553 -4.959098e-04 -6.320073e-05
## PC4          0.0001500536 -7.038329e-05  3.704906e-04
## PC5          0.0006088768  3.883815e-04  8.293721e-04
## PC6          0.0001125261 -1.117121e-04  3.367644e-04
## PC7         -0.0004976586 -7.241999e-04 -2.711173e-04
## PC8         -0.0003141728 -5.440046e-04 -8.434102e-05
## PC9         -0.0002203596 -4.567376e-04  1.601842e-05
## PC10         0.0002504655  1.049448e-05  4.904365e-04
## PC11        -0.0014282252 -1.673753e-03 -1.182698e-03
## PC12         0.0003757187  1.288141e-04  6.226233e-04
## PC13         0.0004538646  2.016142e-04  7.061150e-04
## PC14         0.0015449000  1.289707e-03  1.800093e-03
## PC15        -0.0004179924 -6.761995e-04 -1.597853e-04
## PC16         0.0007435102  4.771006e-04  1.009920e-03
## PC17        -0.0002027179 -4.760350e-04  7.059915e-05
## PC18         0.0002628240 -1.655793e-05  5.422059e-04
## PC19        -0.0002062347 -4.949100e-04  8.244059e-05
## PC20         0.0009425082  6.421689e-04  1.242847e-03
## PC21        -0.0010153050 -1.325855e-03 -7.047552e-04
## PC22         0.0041047459  3.780751e-03  4.428741e-03
## PC23        -0.0005419278 -1.188404e-03  1.045486e-04
## PC24        -0.0011782153 -1.892608e-03 -4.638229e-04
## PC25         0.0006344173 -8.273767e-05  1.351572e-03
## PC26        -0.0003120001 -1.024234e-03  4.002338e-04
## PC27        -0.0004700044 -1.186125e-03  2.461158e-04
## PC28         0.0006790259 -4.115660e-05  1.399208e-03
## PC29        -0.0006818032 -1.401244e-03  3.763804e-05
## PC30         0.0003592277 -3.696720e-04  1.088127e-03
## PC32         0.0011994645  4.679920e-04  1.930937e-03
## PC33         0.0002997233 -4.193797e-04  1.018826e-03
## PC34         0.0002616703 -4.722802e-04  9.956209e-04
## PC36         0.0005114395 -2.182976e-04  1.241177e-03
## PC37        -0.0003355461 -1.070123e-03  3.990308e-04
## PC38         0.0004344763 -3.028881e-04  1.171841e-03
## PC39         0.0003451893 -3.939132e-04  1.084292e-03
## PC40         0.0003894461 -3.487638e-04  1.127656e-03
## PC41        -0.0006644526 -1.403295e-03  7.439033e-05
## PC42        -0.0004883539 -1.230427e-03  2.537189e-04
## PC43         0.0009571772  2.217230e-04  1.692631e-03
## PC44        -0.0003387228 -1.077106e-03  3.996600e-04
## PC45         0.0014007629  6.585599e-04  2.142966e-03
## PC46         0.0016938942  9.426549e-04  2.445133e-03
## PC47         0.0003118516 -4.414656e-04  1.065169e-03
## PC48        -0.0003526961 -1.100760e-03  3.953680e-04
## PC49         0.0003890067 -3.598131e-04  1.137827e-03
## PC50        -0.0006686539 -1.417545e-03  8.023759e-05
## PC51         0.0004486878 -3.072433e-04  1.204619e-03
## PC52         0.0001752847 -5.779587e-04  9.285281e-04
## PC53        -0.0003592069 -1.117007e-03  3.985928e-04
## PC55         0.0002028839 -5.612425e-04  9.670104e-04
## PC56         0.0001686360 -5.888921e-04  9.261642e-04
## PC57         0.0004316819 -3.274207e-04  1.190784e-03
## PC58        -0.0004315006 -1.197407e-03  3.344054e-04
## PC59         0.0014939212  7.322477e-04  2.255595e-03
## PC60        -0.0010186293 -1.780994e-03 -2.562647e-04
## PC61        -0.0005928354 -1.360066e-03  1.743947e-04
## PC62        -0.0002034974 -9.701223e-04  5.631275e-04
## PC63         0.0004843173 -2.813009e-04  1.249935e-03
## PC64        -0.0001758589 -9.436420e-04  5.919241e-04
## PC65         0.0009866345  2.196406e-04  1.753628e-03
## PC66         0.0004330656 -3.434311e-04  1.209562e-03
## PC67         0.0006356282 -1.331155e-04  1.404372e-03
## PC68         0.0007790931  7.334388e-06  1.550852e-03
## PC69        -0.0017464448 -2.522593e-03 -9.702964e-04
## PC70         0.0002374992 -5.377967e-04  1.012795e-03
## PC71        -0.0006290740 -1.403397e-03  1.452490e-04
## PC72         0.0005181758 -2.626325e-04  1.298984e-03
## PC73         0.0004983506 -2.793461e-04  1.276047e-03
## PC74        -0.0001663813 -9.491878e-04  6.164253e-04
## PC75         0.0003121803 -4.708293e-04  1.095190e-03
## PC76         0.0012720467  4.906518e-04  2.053442e-03
## PC78        -0.0003994344 -1.184636e-03  3.857668e-04
## PC79         0.0007698287 -1.564983e-05  1.555307e-03
## PC80        -0.0006520201 -1.435448e-03  1.314077e-04
## PC82        -0.0009789431 -1.759943e-03 -1.979436e-04
## PC83         0.0010988317  3.109389e-04  1.886725e-03
## PC85         0.0001660642 -6.255805e-04  9.577089e-04
## PC86         0.0003496346 -4.428536e-04  1.142123e-03
## PC87         0.0004451030 -3.489412e-04  1.239147e-03
## PC88         0.0002318335 -5.562157e-04  1.019883e-03
## PC89         0.0003808273 -4.149632e-04  1.176618e-03
## PC90        -0.0020087691 -2.808799e-03 -1.208740e-03
## PC91         0.0006604596 -1.347978e-04  1.455717e-03
## PC92        -0.0005247861 -1.322298e-03  2.727257e-04
## PC93         0.0002036291 -5.947062e-04  1.001964e-03
## PC94         0.0005510716 -2.462237e-04  1.348367e-03
## PC95        -0.0002462988 -1.047223e-03  5.546255e-04
## PC96        -0.0006872921 -1.491586e-03  1.170019e-04
## PC98         0.0005269882 -2.765694e-04  1.330546e-03
## PC101       -0.0013354257 -2.135917e-03 -5.349344e-04
## PC102        0.0015452159  7.388632e-04  2.351569e-03
## PC104        0.0007226484 -8.558940e-05  1.530886e-03
## PC105        0.0001688957 -6.385070e-04  9.762984e-04
## PC106        0.0013249636  5.150856e-04  2.134842e-03
## PC107        0.0001856730 -6.255479e-04  9.968939e-04
## PC108       -0.0010089374 -1.823089e-03 -1.947857e-04
## PC111       -0.0006854404 -1.503617e-03  1.327362e-04
## PC112        0.0007218671 -1.006344e-04  1.544369e-03
## PC114       -0.0004823893 -1.306658e-03  3.418798e-04
## PC115       -0.0007510769 -1.575450e-03  7.329638e-05
## PC117       -0.0017330605 -2.551275e-03 -9.148456e-04
## PC118       -0.0010582700 -1.884554e-03 -2.319859e-04
## PC119       -0.0010043509 -1.836100e-03 -1.726014e-04
## PC120       -0.0006520716 -1.478225e-03  1.740823e-04
## PC121        0.0007367603 -9.451144e-05  1.568032e-03
## PC123       -0.0007913389 -1.628092e-03  4.541387e-05
## PC124       -0.0002753178 -1.105457e-03  5.548218e-04
## PC125        0.0003233625 -5.102690e-04  1.156994e-03
## PC126        0.0009520484  1.201403e-04  1.783956e-03
## PC127        0.0013921456  5.586443e-04  2.225647e-03
## PC128       -0.0008839796 -1.721751e-03 -4.620797e-05
## PC129       -0.0008380986 -1.681385e-03  5.187439e-06
## PC131        0.0010265727  1.948022e-04  1.858343e-03
## PC132        0.0015360116  6.961791e-04  2.375844e-03
## PC133       -0.0002152622 -1.049469e-03  6.189442e-04
## PC134       -0.0001882865 -1.026806e-03  6.502335e-04
## PC135       -0.0005054907 -1.351296e-03  3.403150e-04
## PC137        0.0003858280 -4.626697e-04  1.234326e-03
## PC138        0.0011356535  2.962514e-04  1.975056e-03
## PC139       -0.0013163614 -2.161652e-03 -4.710707e-04
## PC142        0.0005530691 -2.991333e-04  1.405272e-03
## PC143       -0.0010254284 -1.880288e-03 -1.705686e-04
## PC145        0.0005098362 -3.457919e-04  1.365464e-03
## PC146       -0.0003701883 -1.222235e-03  4.818584e-04
## PC147        0.0009946295  1.389404e-04  1.850319e-03
## PC148        0.0007943120 -6.512855e-05  1.653752e-03
## PC149       -0.0003687632 -1.224515e-03  4.869885e-04
## PC150        0.0003343696 -5.296961e-04  1.198435e-03
## PC151        0.0006582715 -2.057141e-04  1.522257e-03
## PC153       -0.0002331037 -1.100823e-03  6.346156e-04
## PC154        0.0005585424 -3.036037e-04  1.420688e-03
## PC155        0.0006758915 -1.910115e-04  1.542795e-03
## PC156       -0.0013557997 -2.230491e-03 -4.811087e-04
## PC157        0.0011142332  2.427858e-04  1.985681e-03
## PC158       -0.0002926967 -1.163527e-03  5.781336e-04
## PC159       -0.0003955034 -1.261663e-03  4.706565e-04
## PC160        0.0014287912  5.627705e-04  2.294812e-03
## PC161       -0.0001810139 -1.054639e-03  6.926109e-04
## PC162        0.0004204782 -4.506401e-04  1.291596e-03
## PC163        0.0005048545 -3.670057e-04  1.376715e-03
## PC164       -0.0003702884 -1.248583e-03  5.080067e-04
## PC166       -0.0004966723 -1.374447e-03  3.811023e-04
## PC167        0.0004720439 -4.037883e-04  1.347876e-03
## PC170       -0.0007165705 -1.595553e-03  1.624124e-04
## PC171        0.0006382425 -2.376219e-04  1.514107e-03
## PC172        0.0005221531 -3.621022e-04  1.406408e-03
## PC173       -0.0011111284 -1.997185e-03 -2.250724e-04
## PC174        0.0009658581  8.191380e-05  1.849802e-03
## PC175        0.0007042264 -1.852683e-04  1.593721e-03
## PC179       -0.0015329190 -2.427945e-03 -6.378927e-04
## PC180        0.0010984029  1.984913e-04  1.998315e-03
## PC181       -0.0008781256 -1.772668e-03  1.641666e-05
## PC182       -0.0002277114 -1.126785e-03  6.713624e-04
## PC183       -0.0007057617 -1.602555e-03  1.910313e-04
## PC184       -0.0005281246 -1.429204e-03  3.729553e-04
## PC186        0.0009900218  8.573069e-05  1.894313e-03
## PC187        0.0011887661  2.881999e-04  2.089332e-03
## PC188       -0.0007435318 -1.647150e-03  1.600868e-04
## PC189        0.0009600851  4.714479e-05  1.873025e-03
## PC190       -0.0003711605 -1.274941e-03  5.326201e-04
## PC191       -0.0004190125 -1.322567e-03  4.845419e-04
## PC192       -0.0018522366 -2.762783e-03 -9.416901e-04
## PC193        0.0002352162 -6.798789e-04  1.150311e-03

Test

if (algo.backward.caret == TRUE){
  test.model(model.backward, data.test
             ,method = 'leapBackward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.039   2.083   2.098   2.097   2.110   2.154 
## [1] "leapBackward  Test MSE: 0.00102836422121638"

Stepwise Selection with CV

Train

if (algo.stepwise.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapSeq"
                                   ,feature.names = feature.names)
  model.stepwise = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 21 on full training set
## [1] "All models results"
##    nvmax       RMSE   Rsquared        MAE       RMSESD  RsquaredSD        MAESD
## 1      1 0.03596827 0.01527512 0.02784873 0.0008825615 0.006983967 0.0004973760
## 2      2 0.03575392 0.02743041 0.02768393 0.0008225004 0.012005307 0.0003996057
## 3      3 0.03559100 0.03645637 0.02760432 0.0007565589 0.015046580 0.0003491449
## 4      4 0.03521623 0.05820690 0.02725501 0.0008120500 0.026133041 0.0003377191
## 5      5 0.03504569 0.06691536 0.02714897 0.0008683118 0.026560799 0.0003921311
## 6      6 0.03502016 0.06788875 0.02713994 0.0008683515 0.024237027 0.0003823853
## 7      7 0.03494874 0.07154979 0.02709469 0.0008759187 0.023547787 0.0003737225
## 8      8 0.03485154 0.07691164 0.02701881 0.0008845557 0.025484354 0.0004071258
## 9      9 0.03474539 0.08175798 0.02694702 0.0008798285 0.022240966 0.0004188303
## 10    10 0.03473831 0.08205722 0.02691589 0.0008735658 0.021275990 0.0004221505
## 11    11 0.03467281 0.08545058 0.02687814 0.0008590112 0.021741633 0.0004367692
## 12    12 0.03480051 0.07838011 0.02698333 0.0007736157 0.022396497 0.0003830715
## 13    13 0.03479003 0.07967316 0.02698113 0.0010144558 0.025800714 0.0005416168
## 14    14 0.03470861 0.08427002 0.02685605 0.0008561926 0.022475789 0.0003582521
## 15    15 0.03472035 0.08370827 0.02688634 0.0008174997 0.022799803 0.0003234209
## 16    16 0.03473846 0.08240525 0.02693034 0.0008690334 0.023843643 0.0004312217
## 17    17 0.03472856 0.08272831 0.02693944 0.0008762592 0.022490018 0.0004255057
## 18    18 0.03470802 0.08365234 0.02687973 0.0008204037 0.023552122 0.0004172611
## 19    19 0.03471436 0.08330441 0.02689963 0.0007927295 0.019590131 0.0003959860
## 20    20 0.03464868 0.08701104 0.02683826 0.0008400387 0.023194229 0.0003896163
## 21    21 0.03463457 0.08762411 0.02685159 0.0008402589 0.022592445 0.0004321247
## [1] "Best Model"
##    nvmax
## 21    21

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.096700e+00  2.095793e+00  2.097608e+00
## PC1          2.873471e-04  2.265370e-04  3.481572e-04
## PC2         -3.749128e-04 -4.466321e-04 -3.031935e-04
## PC3         -8.988881e-05 -1.757190e-04 -4.058607e-06
## PC4         -9.456845e-05 -1.829814e-04 -6.155477e-06
## PC5         -2.370038e-04 -3.257068e-04 -1.483009e-04
## PC6          9.804339e-05  7.424745e-06  1.886620e-04
## PC7         -1.746993e-04 -2.666232e-04 -8.277543e-05
## PC8         -1.275751e-04 -2.219056e-04 -3.324452e-05
## PC9          8.338440e-05 -1.453646e-05  1.813053e-04
## PC10        -2.575589e-05 -1.266890e-04  7.517724e-05
## PC11         4.798348e-04  3.752480e-04  5.844216e-04
## PC12        -3.711748e-04 -4.763764e-04 -2.659731e-04
## PC13         2.934464e-04  1.849049e-04  4.019878e-04
## PC14         5.422023e-04  4.323119e-04  6.520927e-04
## PC15        -1.240986e-04 -2.372591e-04 -1.093813e-05
## PC16         2.136898e-04  9.582792e-05  3.315516e-04
## PC17        -9.631725e-05 -2.188520e-04  2.621754e-05
## PC18        -8.540462e-05 -2.123825e-04  4.157327e-05
## PC19        -8.935724e-05 -2.220067e-04  4.329220e-05
## PC20         4.024553e-04  2.603635e-04  5.445472e-04
## PC21        -3.428740e-04 -4.912106e-04 -1.945375e-04

Test

if (algo.stepwise.caret == TRUE){
  test.model(model.stepwise, data.test
             ,method = 'leapSeq',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
  
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.085   2.094   2.097   2.097   2.100   2.109 
## [1] "leapSeq  Test MSE: 0.00128857044529284"

LASSO with CV

Train

if (algo.LASSO.caret == TRUE){
  set.seed(1)
  tune.grid= expand.grid(alpha = 1,lambda = 10^seq(from=-4,to=-2,length=100))
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "glmnet"
                                   ,subopt = 'LASSO'
                                   ,tune.grid = tune.grid
                                   ,feature.names = feature.names)
  model.LASSO.caret = returned$model
}
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.000231 on full training set
## glmnet 
## 
## 5584 samples
##  193 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE        Rsquared    MAE       
##   0.0001000000  0.03237706  0.20430616  0.02492445
##   0.0001047616  0.03237422  0.20436586  0.02492339
##   0.0001097499  0.03237139  0.20442460  0.02492231
##   0.0001149757  0.03236855  0.20448359  0.02492125
##   0.0001204504  0.03236574  0.20454159  0.02492037
##   0.0001261857  0.03236292  0.20459968  0.02491956
##   0.0001321941  0.03236014  0.20465671  0.02491880
##   0.0001384886  0.03235757  0.20470501  0.02491834
##   0.0001450829  0.03235513  0.20474867  0.02491799
##   0.0001519911  0.03235278  0.20478905  0.02491787
##   0.0001592283  0.03235062  0.20482255  0.02491799
##   0.0001668101  0.03234872  0.20484544  0.02491847
##   0.0001747528  0.03234698  0.20486337  0.02491940
##   0.0001830738  0.03234547  0.20487291  0.02492076
##   0.0001917910  0.03234425  0.20487311  0.02492238
##   0.0002009233  0.03234321  0.20486904  0.02492418
##   0.0002104904  0.03234252  0.20485367  0.02492616
##   0.0002205131  0.03234200  0.20483645  0.02492824
##   0.0002310130  0.03234186  0.20480690  0.02493080
##   0.0002420128  0.03234214  0.20476356  0.02493385
##   0.0002535364  0.03234296  0.20470245  0.02493737
##   0.0002656088  0.03234422  0.20462859  0.02494154
##   0.0002782559  0.03234615  0.20453252  0.02494654
##   0.0002915053  0.03234827  0.20443848  0.02495176
##   0.0003053856  0.03235123  0.20431483  0.02495802
##   0.0003199267  0.03235433  0.20419763  0.02496462
##   0.0003351603  0.03235810  0.20406096  0.02497192
##   0.0003511192  0.03236213  0.20392870  0.02497952
##   0.0003678380  0.03236704  0.20376906  0.02498819
##   0.0003853529  0.03237240  0.20360601  0.02499728
##   0.0004037017  0.03237884  0.20340727  0.02500729
##   0.0004229243  0.03238552  0.20321590  0.02501782
##   0.0004430621  0.03239346  0.20298088  0.02502978
##   0.0004641589  0.03240201  0.20273695  0.02504254
##   0.0004862602  0.03241218  0.20243409  0.02505676
##   0.0005094138  0.03242408  0.20206983  0.02507165
##   0.0005336699  0.03243795  0.20162922  0.02508758
##   0.0005590810  0.03245307  0.20115207  0.02510364
##   0.0005857021  0.03247037  0.20058402  0.02512149
##   0.0006135907  0.03248937  0.19995059  0.02514026
##   0.0006428073  0.03251082  0.19920820  0.02516137
##   0.0006734151  0.03253476  0.19835657  0.02518446
##   0.0007054802  0.03256196  0.19734577  0.02521065
##   0.0007390722  0.03259132  0.19623355  0.02523935
##   0.0007742637  0.03262340  0.19498385  0.02527037
##   0.0008111308  0.03265687  0.19367475  0.02530308
##   0.0008497534  0.03269397  0.19217262  0.02533926
##   0.0008902151  0.03273304  0.19057123  0.02537618
##   0.0009326033  0.03277574  0.18876943  0.02541498
##   0.0009770100  0.03281893  0.18695969  0.02545294
##   0.0010235310  0.03286493  0.18499050  0.02549221
##   0.0010722672  0.03290919  0.18314694  0.02552837
##   0.0011233240  0.03295586  0.18116949  0.02556644
##   0.0011768120  0.03299923  0.17941423  0.02560227
##   0.0012328467  0.03304441  0.17755113  0.02563951
##   0.0012915497  0.03308770  0.17581192  0.02567602
##   0.0013530478  0.03313270  0.17397103  0.02571352
##   0.0014174742  0.03317481  0.17234358  0.02574800
##   0.0014849683  0.03321985  0.17055261  0.02578387
##   0.0015556761  0.03326552  0.16876339  0.02582010
##   0.0016297508  0.03331459  0.16677594  0.02585928
##   0.0017073526  0.03336328  0.16484758  0.02589797
##   0.0017886495  0.03341508  0.16272871  0.02593903
##   0.0018738174  0.03346526  0.16073908  0.02597801
##   0.0019630407  0.03351783  0.15861389  0.02601757
##   0.0020565123  0.03356967  0.15660341  0.02605658
##   0.0021544347  0.03362472  0.15439491  0.02609822
##   0.0022570197  0.03367787  0.15237190  0.02613755
##   0.0023644894  0.03373312  0.15024560  0.02617853
##   0.0024770764  0.03378621  0.14836096  0.02621897
##   0.0025950242  0.03384270  0.14629008  0.02626271
##   0.0027185882  0.03389667  0.14449517  0.02630422
##   0.0028480359  0.03395278  0.14261724  0.02634810
##   0.0029836472  0.03401145  0.14067474  0.02639391
##   0.0031257158  0.03407540  0.13840153  0.02644289
##   0.0032745492  0.03414543  0.13570087  0.02649606
##   0.0034304693  0.03422211  0.13248074  0.02655446
##   0.0035938137  0.03430607  0.12863012  0.02661775
##   0.0037649358  0.03439722  0.12409220  0.02668627
##   0.0039442061  0.03449167  0.11917043  0.02675621
##   0.0041320124  0.03459380  0.11337558  0.02683140
##   0.0043287613  0.03469641  0.10730977  0.02690799
##   0.0045348785  0.03480305  0.10059436  0.02698658
##   0.0047508102  0.03489975  0.09460365  0.02705788
##   0.0049770236  0.03499516  0.08853118  0.02712890
##   0.0052140083  0.03504889  0.08677759  0.02716869
##   0.0054622772  0.03509383  0.08599797  0.02720016
##   0.0057223677  0.03513595  0.08593606  0.02722981
##   0.0059948425  0.03518147  0.08593606  0.02726113
##   0.0062802914  0.03523136  0.08593606  0.02729544
##   0.0065793322  0.03528603  0.08593606  0.02733316
##   0.0068926121  0.03534591  0.08593606  0.02737515
##   0.0072208090  0.03541152  0.08593606  0.02742135
##   0.0075646333  0.03548337  0.08593606  0.02747242
##   0.0079248290  0.03556205  0.08593606  0.02752828
##   0.0083021757  0.03564819  0.08593606  0.02758979
##   0.0086974900  0.03574248  0.08593606  0.02765811
##   0.0091116276  0.03584566  0.08593606  0.02773430
##   0.0095454846  0.03595856  0.08593606  0.02781766
##   0.0100000000  0.03608204  0.08593606  0.02790873
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.000231013.

##    alpha      lambda
## 19     1 0.000231013
##     alpha       lambda       RMSE   Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.0001000000 0.03237706 0.20430616 0.02492445 0.0010514127 0.02932455 0.0006133562
## 2       1 0.0001047616 0.03237422 0.20436586 0.02492339 0.0010528396 0.02941924 0.0006140002
## 3       1 0.0001097499 0.03237139 0.20442460 0.02492231 0.0010543409 0.02951969 0.0006146666
## 4       1 0.0001149757 0.03236855 0.20448359 0.02492125 0.0010558906 0.02962589 0.0006153772
## 5       1 0.0001204504 0.03236574 0.20454159 0.02492037 0.0010575173 0.02973678 0.0006161373
## 6       1 0.0001261857 0.03236292 0.20459968 0.02491956 0.0010592250 0.02985289 0.0006168583
## 7       1 0.0001321941 0.03236014 0.20465671 0.02491880 0.0010610339 0.02997495 0.0006176444
## 8       1 0.0001384886 0.03235757 0.20470501 0.02491834 0.0010628825 0.03010283 0.0006183502
## 9       1 0.0001450829 0.03235513 0.20474867 0.02491799 0.0010648197 0.03023799 0.0006190434
## 10      1 0.0001519911 0.03235278 0.20478905 0.02491787 0.0010668515 0.03037971 0.0006195945
## 11      1 0.0001592283 0.03235062 0.20482255 0.02491799 0.0010688815 0.03052393 0.0006199889
## 12      1 0.0001668101 0.03234872 0.20484544 0.02491847 0.0010709436 0.03066852 0.0006202538
## 13      1 0.0001747528 0.03234698 0.20486337 0.02491940 0.0010730452 0.03081783 0.0006203681
## 14      1 0.0001830738 0.03234547 0.20487291 0.02492076 0.0010751057 0.03097422 0.0006201553
## 15      1 0.0001917910 0.03234425 0.20487311 0.02492238 0.0010772073 0.03113924 0.0006196926
## 16      1 0.0002009233 0.03234321 0.20486904 0.02492418 0.0010793996 0.03131786 0.0006191212
## 17      1 0.0002104904 0.03234252 0.20485367 0.02492616 0.0010817312 0.03151026 0.0006185582
## 18      1 0.0002205131 0.03234200 0.20483645 0.02492824 0.0010840580 0.03172448 0.0006178439
## 19      1 0.0002310130 0.03234186 0.20480690 0.02493080 0.0010861678 0.03193967 0.0006170199
## 20      1 0.0002420128 0.03234214 0.20476356 0.02493385 0.0010882200 0.03216832 0.0006166432
## 21      1 0.0002535364 0.03234296 0.20470245 0.02493737 0.0010902271 0.03240330 0.0006163316
## 22      1 0.0002656088 0.03234422 0.20462859 0.02494154 0.0010923853 0.03264617 0.0006163420
## 23      1 0.0002782559 0.03234615 0.20453252 0.02494654 0.0010946732 0.03290103 0.0006164919
## 24      1 0.0002915053 0.03234827 0.20443848 0.02495176 0.0010972763 0.03316684 0.0006167593
## 25      1 0.0003053856 0.03235123 0.20431483 0.02495802 0.0011000187 0.03343912 0.0006168460
## 26      1 0.0003199267 0.03235433 0.20419763 0.02496462 0.0011030099 0.03369382 0.0006167258
## 27      1 0.0003351603 0.03235810 0.20406096 0.02497192 0.0011058002 0.03394524 0.0006164039
## 28      1 0.0003511192 0.03236213 0.20392870 0.02497952 0.0011088865 0.03420316 0.0006160391
## 29      1 0.0003678380 0.03236704 0.20376906 0.02498819 0.0011117434 0.03446090 0.0006155972
## 30      1 0.0003853529 0.03237240 0.20360601 0.02499728 0.0011149485 0.03473673 0.0006156226
## 31      1 0.0004037017 0.03237884 0.20340727 0.02500729 0.0011180631 0.03501615 0.0006155321
## 32      1 0.0004229243 0.03238552 0.20321590 0.02501782 0.0011211818 0.03530673 0.0006156554
## 33      1 0.0004430621 0.03239346 0.20298088 0.02502978 0.0011240586 0.03558958 0.0006157740
## 34      1 0.0004641589 0.03240201 0.20273695 0.02504254 0.0011272084 0.03588173 0.0006162386
## 35      1 0.0004862602 0.03241218 0.20243409 0.02505676 0.0011302948 0.03618185 0.0006168679
## 36      1 0.0005094138 0.03242408 0.20206983 0.02507165 0.0011336266 0.03652588 0.0006175854
## 37      1 0.0005336699 0.03243795 0.20162922 0.02508758 0.0011371486 0.03689380 0.0006185095
## 38      1 0.0005590810 0.03245307 0.20115207 0.02510364 0.0011403905 0.03731909 0.0006195913
## 39      1 0.0005857021 0.03247037 0.20058402 0.02512149 0.0011429833 0.03774235 0.0006203140
## 40      1 0.0006135907 0.03248937 0.19995059 0.02514026 0.0011455028 0.03819463 0.0006201612
## 41      1 0.0006428073 0.03251082 0.19920820 0.02516137 0.0011472909 0.03862974 0.0006193141
## 42      1 0.0006734151 0.03253476 0.19835657 0.02518446 0.0011491989 0.03908645 0.0006184803
## 43      1 0.0007054802 0.03256196 0.19734577 0.02521065 0.0011507298 0.03952879 0.0006169153
## 44      1 0.0007390722 0.03259132 0.19623355 0.02523935 0.0011509517 0.03990938 0.0006149562
## 45      1 0.0007742637 0.03262340 0.19498385 0.02527037 0.0011496376 0.04021995 0.0006114965
## 46      1 0.0008111308 0.03265687 0.19367475 0.02530308 0.0011491543 0.04055876 0.0006084908
## 47      1 0.0008497534 0.03269397 0.19217262 0.02533926 0.0011479245 0.04087281 0.0006057048
## 48      1 0.0008902151 0.03273304 0.19057123 0.02537618 0.0011468800 0.04119751 0.0006030012
## 49      1 0.0009326033 0.03277574 0.18876943 0.02541498 0.0011444862 0.04147823 0.0005993673
## 50      1 0.0009770100 0.03281893 0.18695969 0.02545294 0.0011406428 0.04173480 0.0005945683
## 51      1 0.0010235310 0.03286493 0.18499050 0.02549221 0.0011348747 0.04189457 0.0005881570
## 52      1 0.0010722672 0.03290919 0.18314694 0.02552837 0.0011303879 0.04202610 0.0005843501
## 53      1 0.0011233240 0.03295586 0.18116949 0.02556644 0.0011248000 0.04210029 0.0005806292
## 54      1 0.0011768120 0.03299923 0.17941423 0.02560227 0.0011214835 0.04215963 0.0005796697
## 55      1 0.0012328467 0.03304441 0.17755113 0.02563951 0.0011169182 0.04211257 0.0005772155
## 56      1 0.0012915497 0.03308770 0.17581192 0.02567602 0.0011121959 0.04205755 0.0005760730
## 57      1 0.0013530478 0.03313270 0.17397103 0.02571352 0.0011055508 0.04185912 0.0005735190
## 58      1 0.0014174742 0.03317481 0.17234358 0.02574800 0.0011016730 0.04175650 0.0005723677
## 59      1 0.0014849683 0.03321985 0.17055261 0.02578387 0.0010968549 0.04159783 0.0005715562
## 60      1 0.0015556761 0.03326552 0.16876339 0.02582010 0.0010934042 0.04151258 0.0005700238
## 61      1 0.0016297508 0.03331459 0.16677594 0.02585928 0.0010894229 0.04134831 0.0005683455
## 62      1 0.0017073526 0.03336328 0.16484758 0.02589797 0.0010872791 0.04112067 0.0005678275
## 63      1 0.0017886495 0.03341508 0.16272871 0.02593903 0.0010843628 0.04076539 0.0005653081
## 64      1 0.0018738174 0.03346526 0.16073908 0.02597801 0.0010818388 0.04033568 0.0005616318
## 65      1 0.0019630407 0.03351783 0.15861389 0.02601757 0.0010785359 0.03982554 0.0005566096
## 66      1 0.0020565123 0.03356967 0.15660341 0.02605658 0.0010748808 0.03942911 0.0005518545
## 67      1 0.0021544347 0.03362472 0.15439491 0.02609822 0.0010697429 0.03889112 0.0005468481
## 68      1 0.0022570197 0.03367787 0.15237190 0.02613755 0.0010647175 0.03833275 0.0005427934
## 69      1 0.0023644894 0.03373312 0.15024560 0.02617853 0.0010592035 0.03773197 0.0005373789
## 70      1 0.0024770764 0.03378621 0.14836096 0.02621897 0.0010584422 0.03735209 0.0005358163
## 71      1 0.0025950242 0.03384270 0.14629008 0.02626271 0.0010575406 0.03693044 0.0005357528
## 72      1 0.0027185882 0.03389667 0.14449517 0.02630422 0.0010570521 0.03646255 0.0005376355
## 73      1 0.0028480359 0.03395278 0.14261724 0.02634810 0.0010545214 0.03578823 0.0005379240
## 74      1 0.0029836472 0.03401145 0.14067474 0.02639391 0.0010523806 0.03518016 0.0005388831
## 75      1 0.0031257158 0.03407540 0.13840153 0.02644289 0.0010504946 0.03446554 0.0005402566
## 76      1 0.0032745492 0.03414543 0.13570087 0.02649606 0.0010488623 0.03364500 0.0005416792
## 77      1 0.0034304693 0.03422211 0.13248074 0.02655446 0.0010475280 0.03270129 0.0005430062
## 78      1 0.0035938137 0.03430607 0.12863012 0.02661775 0.0010465418 0.03161573 0.0005451256
## 79      1 0.0037649358 0.03439722 0.12409220 0.02668627 0.0010452507 0.03042474 0.0005487168
## 80      1 0.0039442061 0.03449167 0.11917043 0.02675621 0.0010430610 0.02922341 0.0005534763
## 81      1 0.0041320124 0.03459380 0.11337558 0.02683140 0.0010404899 0.02786245 0.0005586014
## 82      1 0.0043287613 0.03469641 0.10730977 0.02690799 0.0010390709 0.02629000 0.0005641951
## 83      1 0.0045348785 0.03480305 0.10059436 0.02698658 0.0010351995 0.02458155 0.0005674549
## 84      1 0.0047508102 0.03489975 0.09460365 0.02705788 0.0010295657 0.02323690 0.0005659152
## 85      1 0.0049770236 0.03499516 0.08853118 0.02712890 0.0010221475 0.02234025 0.0005617542
## 86      1 0.0052140083 0.03504889 0.08677759 0.02716869 0.0010217755 0.02312308 0.0005612301
## 87      1 0.0054622772 0.03509383 0.08599797 0.02720016 0.0010152048 0.02278463 0.0005546133
## 88      1 0.0057223677 0.03513595 0.08593606 0.02722981 0.0010110639 0.02279859 0.0005500080
## 89      1 0.0059948425 0.03518147 0.08593606 0.02726113 0.0010073233 0.02279859 0.0005460736
## 90      1 0.0062802914 0.03523136 0.08593606 0.02729544 0.0010034738 0.02279859 0.0005422876
## 91      1 0.0065793322 0.03528603 0.08593606 0.02733316 0.0009995203 0.02279859 0.0005383386
## 92      1 0.0068926121 0.03534591 0.08593606 0.02737515 0.0009954688 0.02279859 0.0005340838
## 93      1 0.0072208090 0.03541152 0.08593606 0.02742135 0.0009913278 0.02279859 0.0005304021
## 94      1 0.0075646333 0.03548337 0.08593606 0.02747242 0.0009871074 0.02279859 0.0005269654
## 95      1 0.0079248290 0.03556205 0.08593606 0.02752828 0.0009828205 0.02279859 0.0005229650
## 96      1 0.0083021757 0.03564819 0.08593606 0.02758979 0.0009784826 0.02279859 0.0005192337
## 97      1 0.0086974900 0.03574248 0.08593606 0.02765811 0.0009741127 0.02279859 0.0005146450
## 98      1 0.0091116276 0.03584566 0.08593606 0.02773430 0.0009697332 0.02279859 0.0005109178
## 99      1 0.0095454846 0.03595856 0.08593606 0.02781766 0.0009653706 0.02279859 0.0005080032
## 100     1 0.0100000000 0.03608204 0.08593606 0.02790873 0.0009610561 0.02279859 0.0005050122

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##                model.coef
## (Intercept)  2.096649e+00
## PC1         -8.552939e-04
## PC2          1.153567e-03
## PC3         -2.202709e-04
## PC4          8.766908e-05
## PC5          5.448535e-04
## PC6          5.146934e-05
## PC7         -4.384383e-04
## PC8         -2.506612e-04
## PC9         -1.574137e-04
## PC10         1.791317e-04
## PC11        -1.360142e-03
## PC12         3.097931e-04
## PC13         3.727582e-04
## PC14         1.476773e-03
## PC15        -3.591134e-04
## PC16         6.651474e-04
## PC17        -1.286334e-04
## PC18         1.975169e-04
## PC19        -1.229127e-04
## PC20         8.639884e-04
## PC21        -9.266945e-04
## PC22         4.010499e-03
## PC23        -3.703671e-04
## PC24        -9.568813e-04
## PC25         4.248815e-04
## PC26        -1.270067e-04
## PC27        -2.929429e-04
## PC28         5.043291e-04
## PC29        -4.526215e-04
## PC30         1.719691e-04
## PC32         9.724653e-04
## PC33         9.875243e-05
## PC34         5.967370e-05
## PC36         3.010105e-04
## PC37        -1.365698e-04
## PC38         2.186315e-04
## PC39         1.473620e-04
## PC40         1.661558e-04
## PC41        -4.707161e-04
## PC42        -2.572615e-04
## PC43         7.668451e-04
## PC44        -1.606785e-04
## PC45         1.200868e-03
## PC46         1.481650e-03
## PC47         6.966043e-05
## PC48        -1.381915e-04
## PC49         1.746773e-04
## PC50        -4.463293e-04
## PC51         2.562741e-04
## PC53        -1.716000e-04
## PC55         2.520655e-06
## PC57         2.190658e-04
## PC58        -2.118000e-04
## PC59         1.269728e-03
## PC60        -8.151021e-04
## PC61        -3.531730e-04
## PC62        -1.698639e-06
## PC63         2.773226e-04
## PC64        -5.534494e-07
## PC65         7.919730e-04
## PC66         2.039270e-04
## PC67         4.016175e-04
## PC68         5.829741e-04
## PC69        -1.501312e-03
## PC70         1.371337e-05
## PC71        -4.202176e-04
## PC72         2.961225e-04
## PC73         2.908718e-04
## PC75         1.147186e-04
## PC76         1.081418e-03
## PC78        -2.037357e-04
## PC79         5.725600e-04
## PC80        -4.490785e-04
## PC82        -7.736292e-04
## PC83         8.909645e-04
## PC86         1.503877e-04
## PC87         2.275243e-04
## PC88         1.064817e-05
## PC89         1.384727e-04
## PC90        -1.791047e-03
## PC91         4.661734e-04
## PC92        -3.156853e-04
## PC93         5.042043e-07
## PC94         3.240270e-04
## PC95        -1.319599e-05
## PC96        -4.396468e-04
## PC98         2.726262e-04
## PC101       -1.118744e-03
## PC102        1.295157e-03
## PC104        4.924657e-04
## PC106        1.084582e-03
## PC108       -7.910458e-04
## PC111       -4.649591e-04
## PC112        4.821355e-04
## PC114       -2.433278e-04
## PC115       -5.352010e-04
## PC117       -1.556362e-03
## PC118       -8.240139e-04
## PC119       -7.864255e-04
## PC120       -4.085304e-04
## PC121        4.902701e-04
## PC123       -5.787692e-04
## PC124       -2.825924e-05
## PC125        7.477971e-05
## PC126        7.158206e-04
## PC127        1.125715e-03
## PC128       -6.252518e-04
## PC129       -6.190280e-04
## PC131        8.181008e-04
## PC132        1.295630e-03
## PC133       -1.544047e-05
## PC135       -2.886038e-04
## PC137        1.581159e-04
## PC138        9.305620e-04
## PC139       -1.082998e-03
## PC142        3.164800e-04
## PC143       -7.828669e-04
## PC145        2.954022e-04
## PC146       -1.370641e-04
## PC147        8.056341e-04
## PC148        5.298159e-04
## PC149       -1.239111e-04
## PC150        1.221041e-04
## PC151        3.980184e-04
## PC153       -7.176657e-07
## PC154        3.315515e-04
## PC155        4.311093e-04
## PC156       -1.111438e-03
## PC157        8.856272e-04
## PC158       -6.324901e-05
## PC159       -1.747399e-04
## PC160        1.185070e-03
## PC162        1.680543e-04
## PC163        2.548350e-04
## PC164       -1.411706e-04
## PC166       -2.841743e-04
## PC167        2.257563e-04
## PC170       -4.882636e-04
## PC171        3.964357e-04
## PC172        2.779558e-04
## PC173       -8.520678e-04
## PC174        7.146662e-04
## PC175        4.435241e-04
## PC179       -1.271227e-03
## PC180        8.646436e-04
## PC181       -6.114437e-04
## PC183       -4.543279e-04
## PC184       -3.025662e-04
## PC186        7.530073e-04
## PC187        9.835241e-04
## PC188       -4.880449e-04
## PC189        7.120546e-04
## PC190       -1.001691e-04
## PC191       -1.854777e-04
## PC192       -1.607936e-03
## PC193        6.808670e-07

Test

if (algo.LASSO.caret == TRUE){
  test.model(model.LASSO.caret, data.test
             ,method = 'glmnet',subopt = "LASSO"
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.044   2.085   2.098   2.097   2.108   2.148 
## [1] "glmnet LASSO Test MSE: 0.00101664585720616"

LARS with CV

Train

if (algo.LARS.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "lars"
                                   ,subopt = 'NULL'
                                   ,feature.names = feature.names)
  model.LARS.caret = returned$model
}
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled
## performance measures.
## Aggregating results
## Selecting tuning parameters
## Fitting fraction = 0.747 on full training set
## Least Angle Regression 
## 
## 5584 samples
##  193 predictor
## 
## Pre-processing: centered (193), scaled (193) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   fraction    RMSE        Rsquared    MAE       
##   0.00000000  0.03622572         NaN  0.02801456
##   0.01010101  0.03579651  0.08593606  0.02769814
##   0.02020202  0.03543303  0.08593606  0.02743616
##   0.03030303  0.03513734  0.08593606  0.02723025
##   0.04040404  0.03492081  0.09292322  0.02707642
##   0.05050505  0.03471463  0.10633188  0.02692098
##   0.06060606  0.03452919  0.11715036  0.02678377
##   0.07070707  0.03435318  0.12651740  0.02665434
##   0.08080808  0.03418875  0.13406136  0.02653039
##   0.09090909  0.03403827  0.13984218  0.02641534
##   0.10101010  0.03390498  0.14421970  0.02631187
##   0.11111111  0.03379097  0.14808896  0.02622324
##   0.12121212  0.03368512  0.15202995  0.02614485
##   0.13131313  0.03359092  0.15567150  0.02607577
##   0.14141414  0.03350762  0.15895154  0.02601259
##   0.15151515  0.03343053  0.16206968  0.02595238
##   0.16161616  0.03335839  0.16498155  0.02589531
##   0.17171717  0.03329494  0.16752295  0.02584597
##   0.18181818  0.03323838  0.16979514  0.02580058
##   0.19191919  0.03318980  0.17171132  0.02576109
##   0.20202020  0.03314673  0.17338649  0.02572657
##   0.21212121  0.03310662  0.17498865  0.02569319
##   0.22222222  0.03306855  0.17653569  0.02566084
##   0.23232323  0.03303134  0.17806719  0.02562945
##   0.24242424  0.03299545  0.17953749  0.02559926
##   0.25252525  0.03296055  0.18096702  0.02557055
##   0.26262626  0.03292537  0.18244217  0.02554183
##   0.27272727  0.03289065  0.18390816  0.02551342
##   0.28282828  0.03285800  0.18527186  0.02548648
##   0.29292929  0.03282640  0.18659306  0.02545967
##   0.30303030  0.03279534  0.18791242  0.02543260
##   0.31313131  0.03276478  0.18921193  0.02540512
##   0.32323232  0.03273555  0.19045354  0.02537848
##   0.33333333  0.03270797  0.19160312  0.02535271
##   0.34343434  0.03268225  0.19265187  0.02532831
##   0.35353535  0.03265811  0.19362329  0.02530480
##   0.36363636  0.03263520  0.19453651  0.02528254
##   0.37373737  0.03261375  0.19537813  0.02526180
##   0.38383838  0.03259353  0.19616266  0.02524209
##   0.39393939  0.03257438  0.19689504  0.02522335
##   0.40404040  0.03255642  0.19757337  0.02520563
##   0.41414141  0.03253961  0.19819752  0.02518942
##   0.42424242  0.03252407  0.19875825  0.02517437
##   0.43434343  0.03250939  0.19928354  0.02516007
##   0.44444444  0.03249594  0.19975109  0.02514685
##   0.45454545  0.03248362  0.20016384  0.02513494
##   0.46464646  0.03247214  0.20054030  0.02512351
##   0.47474747  0.03246120  0.20089936  0.02511239
##   0.48484848  0.03245112  0.20122265  0.02510186
##   0.49494949  0.03244142  0.20153542  0.02509172
##   0.50505051  0.03243218  0.20183167  0.02508163
##   0.51515152  0.03242357  0.20210628  0.02507167
##   0.52525253  0.03241571  0.20235246  0.02506201
##   0.53535354  0.03240854  0.20257241  0.02505258
##   0.54545455  0.03240211  0.20276120  0.02504352
##   0.55555556  0.03239638  0.20292318  0.02503505
##   0.56565657  0.03239112  0.20306993  0.02502725
##   0.57575758  0.03238608  0.20321328  0.02501955
##   0.58585859  0.03238159  0.20333746  0.02501213
##   0.59595960  0.03237730  0.20345902  0.02500536
##   0.60606061  0.03237288  0.20359502  0.02499845
##   0.61616162  0.03236890  0.20371738  0.02499187
##   0.62626263  0.03236521  0.20383334  0.02498557
##   0.63636364  0.03236174  0.20394536  0.02497937
##   0.64646465  0.03235884  0.20403846  0.02497368
##   0.65656566  0.03235622  0.20412754  0.02496832
##   0.66666667  0.03235335  0.20423544  0.02496273
##   0.67676768  0.03235069  0.20433981  0.02495745
##   0.68686869  0.03234845  0.20443163  0.02495254
##   0.69696970  0.03234660  0.20451169  0.02494807
##   0.70707071  0.03234493  0.20459153  0.02494369
##   0.71717172  0.03234344  0.20466941  0.02493953
##   0.72727273  0.03234250  0.20472876  0.02493597
##   0.73737374  0.03234190  0.20477894  0.02493288
##   0.74747475  0.03234152  0.20482626  0.02492976
##   0.75757576  0.03234153  0.20486231  0.02492706
##   0.76767677  0.03234200  0.20488309  0.02492503
##   0.77777778  0.03234284  0.20489446  0.02492306
##   0.78787879  0.03234395  0.20490230  0.02492113
##   0.79797980  0.03234544  0.20490057  0.02491931
##   0.80808081  0.03234740  0.20488352  0.02491799
##   0.81818182  0.03234977  0.20485545  0.02491734
##   0.82828283  0.03235258  0.20481460  0.02491708
##   0.83838384  0.03235585  0.20475975  0.02491739
##   0.84848485  0.03235965  0.20468891  0.02491809
##   0.85858586  0.03236408  0.20459774  0.02491933
##   0.86868687  0.03236886  0.20449895  0.02492089
##   0.87878788  0.03237407  0.20438947  0.02492290
##   0.88888889  0.03237977  0.20426702  0.02492511
##   0.89898990  0.03238604  0.20412817  0.02492782
##   0.90909091  0.03239271  0.20398089  0.02493086
##   0.91919192  0.03239962  0.20383101  0.02493410
##   0.92929293  0.03240708  0.20366371  0.02493761
##   0.93939394  0.03241505  0.20348047  0.02494137
##   0.94949495  0.03242366  0.20327811  0.02494550
##   0.95959596  0.03243282  0.20306139  0.02495009
##   0.96969697  0.03244217  0.20284564  0.02495487
##   0.97979798  0.03245189  0.20262368  0.02496004
##   0.98989899  0.03246205  0.20239026  0.02496558
##   1.00000000  0.03247255  0.20215266  0.02497147
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.7474747.

##     fraction
## 75 0.7474747
## Warning: Removed 1 rows containing missing values (geom_point).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##           PC1           PC2           PC3           PC4           PC5           PC6           PC7           PC8 
## -3.988170e-03  4.878684e-03 -8.526731e-04  3.351069e-04  2.063808e-03  1.947503e-04 -1.616729e-03 -9.132621e-04 
##           PC9          PC10          PC11          PC12          PC13          PC14          PC15          PC16 
## -5.584878e-04  6.263828e-04 -4.627169e-03  1.049853e-03  1.236646e-03  4.831032e-03 -1.162506e-03  2.086156e-03 
##          PC17          PC18          PC19          PC20          PC21          PC22          PC23          PC24 
## -3.958367e-04  5.923466e-04 -3.585792e-04  2.400774e-03 -2.493692e-03  1.032526e-02 -4.810035e-04 -1.120536e-03 
##          PC25          PC26          PC27          PC28          PC29          PC30          PC32          PC33 
##  4.976604e-04 -1.517691e-04 -3.440365e-04  5.869243e-04 -5.284502e-04  2.000232e-04  1.112857e-03  1.178340e-04 
##          PC34          PC36          PC37          PC38          PC39          PC40          PC41          PC42 
##  7.131323e-05  3.474291e-04 -1.583886e-04  2.508781e-04  1.695014e-04  1.913932e-04 -5.345142e-04 -2.927025e-04 
##          PC43          PC44          PC45          PC46          PC47          PC48          PC49          PC50 
##  8.726772e-04 -1.844022e-04  1.352384e-03  1.648339e-03  8.091788e-05 -1.574762e-04  1.981162e-04 -5.009031e-04 
##          PC51          PC53          PC55          PC57          PC58          PC59          PC60          PC61 
##  2.858644e-04 -1.918910e-04  5.930016e-06  2.440868e-04 -2.341775e-04  1.393512e-03 -8.950231e-04 -3.878946e-04 
##          PC62          PC63          PC65          PC66          PC67          PC68          PC69          PC70 
## -5.116714e-06  3.054729e-04  8.643203e-04  2.226328e-04  4.391479e-04  6.332550e-04 -1.618074e-03  1.827845e-05 
##          PC71          PC72          PC73          PC75          PC76          PC78          PC79          PC80 
## -4.559736e-04  3.199434e-04  3.153452e-04  1.253314e-04  1.157187e-03 -2.192798e-04  6.111079e-04 -4.809628e-04 
##          PC82          PC83          PC86          PC87          PC88          PC89          PC90          PC91 
## -8.290339e-04  9.461530e-04  1.613940e-04  2.423663e-04  1.466890e-05  1.487433e-04 -1.871006e-03  4.921081e-04 
##          PC92          PC94          PC95          PC96          PC98         PC101         PC102         PC104 
## -3.334190e-04  3.424129e-04 -1.717325e-05 -4.598081e-04  2.868717e-04 -1.168433e-03  1.343661e-03  5.117444e-04 
##         PC106         PC108         PC111         PC112         PC114         PC115         PC117         PC118 
##  1.119986e-03 -8.131731e-04 -4.773246e-04  4.927533e-04 -2.497992e-04 -5.444345e-04 -1.587782e-03 -8.355620e-04 
##         PC119         PC120         PC121         PC123         PC124         PC125         PC126         PC127 
## -7.921260e-04 -4.161067e-04  4.956362e-04 -5.799475e-04 -3.204001e-05  7.834718e-05  7.212180e-04  1.130150e-03 
##         PC128         PC129         PC131         PC132         PC133         PC135         PC137         PC138 
## -6.265549e-04 -6.154159e-04  8.233144e-04  1.290396e-03 -1.837451e-05 -2.878875e-04  1.587588e-04  9.278639e-04 
##         PC139         PC142         PC143         PC145         PC146         PC147         PC148         PC149 
## -1.072457e-03  3.132648e-04 -7.674173e-04  2.911172e-04 -1.374675e-04  7.882446e-04  5.179070e-04 -1.243735e-04 
##         PC150         PC151         PC154         PC155         PC156         PC157         PC158         PC159 
##  1.210071e-04  3.881120e-04  3.239376e-04  4.182292e-04 -1.063884e-03  8.514616e-04 -6.381080e-05 -1.712888e-04 
##         PC160         PC162         PC163         PC164         PC166         PC167         PC170         PC171 
##  1.144745e-03  1.644135e-04  2.473996e-04 -1.373362e-04 -2.730325e-04  2.184768e-04 -4.663746e-04  3.807047e-04 
##         PC172         PC173         PC174         PC175         PC179         PC180         PC181         PC183 
##  2.653811e-04 -8.055084e-04  6.775518e-04  4.192216e-04 -1.188054e-03  8.043386e-04 -5.737800e-04 -4.257257e-04 
##         PC184         PC186         PC187         PC188         PC189         PC190         PC191         PC192 
## -2.828977e-04  6.971735e-04  9.139870e-04 -4.537524e-04  6.541310e-04 -9.615013e-05 -1.741908e-04 -1.476756e-03

Test

if (algo.LARS.caret == TRUE){
  test.model(model.LARS.caret, data.test
             ,method = 'lars',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.044   2.084   2.098   2.097   2.108   2.148 
## [1] "lars  Test MSE: 0.00101671187293065"

Session Info

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2             knitr_1.20                 htmltools_0.3.6            reshape2_1.4.3            
##  [5] lars_1.2                   doParallel_1.0.14          iterators_1.0.10           caret_6.0-81              
##  [9] leaps_3.0                  ggforce_0.1.3              rlist_0.4.6.1              car_3.0-2                 
## [13] carData_3.0-2              bestNormalize_1.3.0        scales_1.0.0               onewaytests_2.0           
## [17] caTools_1.17.1.1           mosaic_1.5.0               mosaicData_0.17.0          ggformula_0.9.1           
## [21] ggstance_0.3.1             lattice_0.20-35            DT_0.5                     ggiraph_0.6.0             
## [25] investr_1.4.0              glmnet_2.0-16              foreach_1.4.4              Matrix_1.2-14             
## [29] MASS_7.3-50                PerformanceAnalytics_1.5.2 xts_0.11-2                 zoo_1.8-4                 
## [33] forcats_0.3.0              stringr_1.3.1              dplyr_0.7.8                purrr_0.2.5               
## [37] readr_1.3.1                tidyr_0.8.2                tibble_1.4.2               ggplot2_3.1.0             
## [41] tidyverse_1.2.1            usdm_1.1-18                raster_2.8-4               sp_1.3-1                  
## [45] pacman_0.5.0              
## 
## loaded via a namespace (and not attached):
##  [1] readxl_1.2.0       backports_1.1.3    plyr_1.8.4         lazyeval_0.2.1     splines_3.5.1      mycor_0.1.1       
##  [7] crosstalk_1.0.0    leaflet_2.0.2      digest_0.6.18      magrittr_1.5       mosaicCore_0.6.0   openxlsx_4.1.0    
## [13] recipes_0.1.4      modelr_0.1.2       gower_0.1.2        colorspace_1.3-2   rvest_0.3.2        ggrepel_0.8.0     
## [19] haven_2.0.0        crayon_1.3.4       jsonlite_1.5       bindr_0.1.1        survival_2.42-3    glue_1.3.0        
## [25] registry_0.5       gtable_0.2.0       ppcor_1.1          ipred_0.9-8        abind_1.4-5        rngtools_1.3.1    
## [31] bibtex_0.4.2       Rcpp_1.0.0         xtable_1.8-3       units_0.6-2        foreign_0.8-70     stats4_3.5.1      
## [37] lava_1.6.4         prodlim_2018.04.18 htmlwidgets_1.3    httr_1.4.0         RColorBrewer_1.1-2 pkgconfig_2.0.2   
## [43] farver_1.1.0       nnet_7.3-12        labeling_0.3       tidyselect_0.2.5   rlang_0.3.1        later_0.7.5       
## [49] munsell_0.5.0      cellranger_1.1.0   tools_3.5.1        cli_1.0.1          generics_0.0.2     moments_0.14      
## [55] sjlabelled_1.0.17  broom_0.5.1        evaluate_0.12      ggdendro_0.1-20    yaml_2.2.0         ModelMetrics_1.2.2
## [61] zip_2.0.1          nlme_3.1-137       doRNG_1.7.1        mime_0.6           xml2_1.2.0         compiler_3.5.1    
## [67] rstudioapi_0.8     curl_3.2           tweenr_1.0.1       stringi_1.2.4      gdtools_0.1.7      pillar_1.3.1      
## [73] data.table_1.11.8  bitops_1.0-6       insight_0.1.2      httpuv_1.4.5       R6_2.3.0           promises_1.0.1    
## [79] gridExtra_2.3      rio_0.5.16         codetools_0.2-15   assertthat_0.2.0   pkgmaker_0.27      withr_2.1.2       
## [85] nortest_1.0-4      mgcv_1.8-24        hms_0.4.2          quadprog_1.5-5     grid_3.5.1         rpart_4.1-13      
## [91] timeDate_3043.102  class_7.3-14       rmarkdown_1.11     shiny_1.2.0        lubridate_1.7.4